Merge tag 'devicetree-fixes-for-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Dec 2020 20:09:48 +0000 (12:09 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Dec 2020 20:09:48 +0000 (12:09 -0800)
Pull devicetree fixes from Rob Herring:

 - Correct the JSON pointer syntax in binding schemas

 - Drop unnecessary *-supply schema constraints

 - Drop redundant maxItems/items on array schemas

 - Fix various yamllint warnings

 - Fix various missing 'additionalProperties' properties

* tag 'devicetree-fixes-for-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux:
  dt-bindings: Drop redundant maxItems/items
  dt-bindings: net: qcom,ipa: Drop unnecessary type ref on 'memory-region'
  dt-bindings: Drop unnecessary *-supply schemas properties
  dt-bindings/display: abt,y030xx067a: Fix binding
  dt-bindings: clock: imx8qxp-lpcg: eliminate yamllint warnings
  dt-bindings: display: eliminate yamllint warnings
  dt-bindings: media: nokia,smia: eliminate yamllint warnings
  dt-bindings: devapc: add the required property 'additionalProperties'
  dt-bindings: soc: add the required property 'additionalProperties'
  dt-bindings: serial: add the required property 'additionalProperties'
  dt-bindings: xlnx,vcu-settings: fix dt_binding_check warnings
  media: dt-bindings: coda: Add missing 'additionalProperties'
  dt-bindings: Fix JSON pointers

1417 files changed:
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/admin-guide/device-mapper/verity.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/perf-security.rst
Documentation/arm/memory.rst
Documentation/arm64/memory.rst
Documentation/dev-tools/kasan.rst
Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/axi-clkgen.txt [deleted file]
Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt [deleted file]
Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/display/panel/panel-simple.yaml
Documentation/devicetree/bindings/gpio/gpio-pca95xx.yaml
Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.txt [deleted file]
Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/gpio/mstar,msc313-gpio.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/i3c/mipi-i3c-hci.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/power/reset/ocelot-reset.txt
Documentation/devicetree/bindings/power/reset/regulator-poweroff.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt [deleted file]
Documentation/devicetree/bindings/pwm/intel,keembay-pwm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/intel,lgm-pwm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/pwm-mediatek.txt
Documentation/devicetree/bindings/pwm/pwm-mtk-disp.txt
Documentation/devicetree/bindings/rtc/rtc.yaml
Documentation/devicetree/bindings/soc/microchip/atmel,at91rm9200-tcb.yaml
Documentation/devicetree/bindings/vendor-prefixes.yaml
Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
Documentation/devicetree/bindings/watchdog/snps,dw-wdt.yaml
Documentation/driver-api/dma-buf.rst
Documentation/driver-api/driver-model/devres.rst
Documentation/driver-api/gpio/consumer.rst
Documentation/driver-api/gpio/driver.rst
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/time/irq-time-acct/arch-support.txt
Documentation/filesystems/gfs2.rst
Documentation/kbuild/kconfig-language.rst
Documentation/kbuild/kconfig-macro-language.rst
Documentation/kbuild/makefiles.rst
Documentation/kbuild/modules.rst
Documentation/sphinx/parse-headers.pl
Documentation/target/tcm_mod_builder.py
Documentation/trace/postprocess/decode_msr.py
Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
Documentation/trace/postprocess/trace-vmscan-postprocess.pl
Documentation/virt/kvm/api.rst
Documentation/virt/kvm/arm/pvtime.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/kernel/rtc.c
arch/alpha/kernel/syscalls/syscall.tbl
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/compressed/Makefile
arch/arm/boot/compressed/head.S
arch/arm/boot/compressed/string.c
arch/arm/include/asm/assembler.h
arch/arm/include/asm/div64.h
arch/arm/include/asm/elf.h
arch/arm/include/asm/fixmap.h
arch/arm/include/asm/io.h
arch/arm/include/asm/kasan.h [new file with mode: 0644]
arch/arm/include/asm/kasan_def.h [new file with mode: 0644]
arch/arm/include/asm/memory.h
arch/arm/include/asm/pgalloc.h
arch/arm/include/asm/pgtable-2level.h
arch/arm/include/asm/processor.h
arch/arm/include/asm/prom.h
arch/arm/include/asm/string.h
arch/arm/include/asm/thread_info.h
arch/arm/include/asm/uaccess-asm.h
arch/arm/kernel/Makefile
arch/arm/kernel/atags.h
arch/arm/kernel/atags_parse.c
arch/arm/kernel/devtree.c
arch/arm/kernel/entry-armv.S
arch/arm/kernel/entry-common.S
arch/arm/kernel/head-common.S
arch/arm/kernel/head.S
arch/arm/kernel/hyp-stub.S
arch/arm/kernel/iwmmxt.S
arch/arm/kernel/iwmmxt.h [new file with mode: 0644]
arch/arm/kernel/module.c
arch/arm/kernel/phys2virt.S [new file with mode: 0644]
arch/arm/kernel/setup.c
arch/arm/kernel/sleep.S
arch/arm/kernel/smp.c
arch/arm/kernel/unwind.c
arch/arm/lib/memcpy.S
arch/arm/lib/memmove.S
arch/arm/lib/memset.S
arch/arm/mach-s3c/mach-h1940.c
arch/arm/mach-s3c/mach-rx1950.c
arch/arm/mach-sa1100/collie.c
arch/arm/mm/Kconfig
arch/arm/mm/Makefile
arch/arm/mm/init.c
arch/arm/mm/kasan_init.c [new file with mode: 0644]
arch/arm/mm/mmap.c
arch/arm/mm/mmu.c
arch/arm/mm/pgd.c
arch/arm/mm/pv-fixup-asm.S
arch/arm/tools/syscall.tbl
arch/arm/vdso/Makefile
arch/arm/vfp/entry.S
arch/arm/vfp/vfphw.S
arch/arm/vfp/vfpmodule.c
arch/arm64/Kconfig
arch/arm64/Makefile
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/cache.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/el2_setup.h [new file with mode: 0644]
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/exception.h
arch/arm64/include/asm/io.h
arch/arm64/include/asm/kasan.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_coproc.h [deleted file]
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu.h
arch/arm64/include/asm/mte-def.h [new file with mode: 0644]
arch/arm64/include/asm/mte-kasan.h [new file with mode: 0644]
arch/arm64/include/asm/mte.h
arch/arm64/include/asm/percpu.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sections.h
arch/arm64/include/asm/smp.h
arch/arm64/include/asm/spectre.h
arch/arm64/include/asm/string.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/uaccess.h
arch/arm64/include/asm/unistd.h
arch/arm64/include/asm/unistd32.h
arch/arm64/include/asm/virt.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/asm-offsets.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/head.S
arch/arm64/kernel/hibernate.c
arch/arm64/kernel/image-vars.h
arch/arm64/kernel/kaslr.c
arch/arm64/kernel/module.c
arch/arm64/kernel/mte.c
arch/arm64/kernel/proton-pack.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/sleep.S
arch/arm64/kernel/smp.c
arch/arm64/kernel/topology.c
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/kvm/Makefile
arch/arm64/kvm/aarch32.c [deleted file]
arch/arm64/kvm/arm.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/Makefile
arch/arm64/kvm/hyp/aarch32.c
arch/arm64/kvm/hyp/exception.c [new file with mode: 0644]
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/include/hyp/adjust_pc.h [new file with mode: 0644]
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/nvhe/trap_handler.h [new file with mode: 0644]
arch/arm64/kvm/hyp/nvhe/Makefile
arch/arm64/kvm/hyp/nvhe/host.S
arch/arm64/kvm/hyp/nvhe/hyp-init.S
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/hyp-smp.c [new file with mode: 0644]
arch/arm64/kvm/hyp/nvhe/hyp.lds.S
arch/arm64/kvm/hyp/nvhe/psci-relay.c [new file with mode: 0644]
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
arch/arm64/kvm/hyp/smccc_wa.S [deleted file]
arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
arch/arm64/kvm/hyp/vgic-v3-sr.c
arch/arm64/kvm/hyp/vhe/Makefile
arch/arm64/kvm/hyp/vhe/switch.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/mmio.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/pvtime.c
arch/arm64/kvm/regmap.c [deleted file]
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/sys_regs.h
arch/arm64/kvm/va_layout.c
arch/arm64/kvm/vgic-sys-reg-v3.c
arch/arm64/kvm/vgic/vgic-v4.c
arch/arm64/kvm/vgic/vgic.c
arch/arm64/lib/mte.S
arch/arm64/mm/copypage.c
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmap.c
arch/arm64/mm/mteswap.c
arch/arm64/mm/proc.S
arch/arm64/mm/ptdump.c
arch/ia64/kernel/syscalls/syscall.tbl
arch/ia64/scripts/unwcheck.py
arch/m68k/68000/Makefile
arch/m68k/68000/dragen2.c [new file with mode: 0644]
arch/m68k/68000/m68328.c
arch/m68k/68000/m68328.h [new file with mode: 0644]
arch/m68k/68000/m68EZ328.c [deleted file]
arch/m68k/68000/m68VZ328.c [deleted file]
arch/m68k/68000/ucsimm.c [new file with mode: 0644]
arch/m68k/Kconfig.cpu
arch/m68k/Kconfig.machine
arch/m68k/kernel/setup_no.c
arch/m68k/kernel/syscalls/syscall.tbl
arch/m68k/kernel/vmlinux-nommu.lds
arch/microblaze/kernel/syscalls/syscall.tbl
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/parisc/kernel/syscalls/syscall.tbl
arch/powerpc/Kconfig
arch/powerpc/kernel/dma-iommu.c
arch/powerpc/kernel/syscalls/syscall.tbl
arch/powerpc/platforms/pseries/iommu.c
arch/riscv/Kconfig
arch/riscv/Kconfig.socs
arch/riscv/Makefile
arch/riscv/boot/.gitignore
arch/riscv/boot/Makefile
arch/riscv/include/asm/sections.h
arch/riscv/include/asm/set_memory.h
arch/riscv/include/asm/stacktrace.h [new file with mode: 0644]
arch/riscv/include/asm/string.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/asm-offsets.c
arch/riscv/kernel/head.S
arch/riscv/kernel/perf_callchain.c
arch/riscv/kernel/riscv_ksyms.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/stacktrace.c
arch/riscv/kernel/vmlinux.lds.S
arch/riscv/lib/Makefile
arch/riscv/lib/memmove.S [new file with mode: 0644]
arch/riscv/mm/init.c
arch/riscv/mm/pageattr.c
arch/s390/Kconfig
arch/s390/boot/string.c
arch/s390/include/asm/delay.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/processor.h
arch/s390/kernel/entry.S
arch/s390/kernel/idle.c
arch/s390/kernel/ipl.c
arch/s390/kernel/setup.c
arch/s390/kernel/syscalls/syscall.tbl
arch/s390/kvm/guestdbg.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/s390/kvm/pv.c
arch/s390/kvm/vsie.c
arch/s390/lib/delay.c
arch/s390/lib/test_unwind.c
arch/s390/mm/gmap.c
arch/sh/boards/of-generic.c
arch/sh/kernel/syscalls/syscall.tbl
arch/sparc/kernel/syscalls/syscall.tbl
arch/um/Kconfig
arch/um/drivers/chan_user.c
arch/um/drivers/line.c
arch/um/drivers/mconsole_kern.c
arch/um/drivers/net_kern.c
arch/um/drivers/port_kern.c
arch/um/drivers/random.c
arch/um/drivers/ubd_kern.c
arch/um/drivers/vector_kern.c
arch/um/drivers/virtio_uml.c
arch/um/drivers/xterm.c
arch/um/drivers/xterm_kern.c
arch/um/include/asm/irq.h
arch/um/include/asm/pgtable.h
arch/um/include/asm/set_memory.h [new file with mode: 0644]
arch/um/include/linux/time-internal.h
arch/um/include/shared/common-offsets.h
arch/um/include/shared/irq_kern.h
arch/um/include/shared/irq_user.h
arch/um/include/shared/kern_util.h
arch/um/include/shared/os.h
arch/um/kernel/irq.c
arch/um/kernel/process.c
arch/um/kernel/sigio.c
arch/um/kernel/time.c
arch/um/kernel/tlb.c
arch/um/kernel/um_arch.c
arch/um/os-Linux/Makefile
arch/um/os-Linux/helper.c
arch/um/os-Linux/irq.c
arch/um/os-Linux/sigio.c
arch/um/os-Linux/signal.c
arch/um/os-Linux/skas/process.c
arch/um/os-Linux/time.c
arch/um/os-Linux/umid.c
arch/x86/boot/compressed/misc.h
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/vmx.h
arch/x86/include/asm/xen/page.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/svm.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/acpi/wakeup_64.S
arch/x86/kernel/cpu/cpuid-deps.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/traps.c
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/hyperv.c
arch/x86/kvm/hyperv.h
arch/x86/kvm/kvm_cache_regs.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmutrace.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/mtrr.c
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/svm/vmenter.S
arch/x86/kvm/trace.h
arch/x86/kvm/vmx/evmcs.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/xen/Kconfig
arch/x86/xen/p2m.c
arch/xtensa/kernel/syscalls/syscall.tbl
arch/xtensa/kernel/time.c
drivers/acpi/Makefile
drivers/acpi/acpi_pnp.c
drivers/acpi/cppc_acpi.c
drivers/acpi/processor_perflib.c
drivers/acpi/scan.c
drivers/acpi/sleep.c
drivers/acpi/sleep.h
drivers/acpi/x86/s2idle.c [new file with mode: 0644]
drivers/block/xen-blkfront.c
drivers/char/agp/Makefile
drivers/char/agp/agp.h
drivers/char/hw_random/Kconfig
drivers/clk/Kconfig
drivers/clk/Makefile
drivers/clk/at91/at91rm9200.c
drivers/clk/at91/at91sam9260.c
drivers/clk/at91/at91sam9g45.c
drivers/clk/at91/at91sam9n12.c
drivers/clk/at91/at91sam9rl.c
drivers/clk/at91/at91sam9x5.c
drivers/clk/at91/clk-master.c
drivers/clk/at91/clk-sam9x60-pll.c
drivers/clk/at91/dt-compat.c
drivers/clk/at91/pmc.h
drivers/clk/at91/sam9x60.c
drivers/clk/at91/sama5d2.c
drivers/clk/at91/sama5d3.c
drivers/clk/at91/sama5d4.c
drivers/clk/at91/sama7g5.c
drivers/clk/bcm/clk-bcm2711-dvp.c
drivers/clk/clk-axi-clkgen.c
drivers/clk/clk-composite.c
drivers/clk/clk-divider.c
drivers/clk/clk-fsl-flexspi.c [new file with mode: 0644]
drivers/clk/clk-fsl-sai.c
drivers/clk/clk-pwm.c
drivers/clk/clk-qoriq.c
drivers/clk/clk-s2mps11.c
drivers/clk/clk-scpi.c
drivers/clk/clk-si5351.c
drivers/clk/clk-versaclock5.c
drivers/clk/clk.c
drivers/clk/imx/clk-gate2.c
drivers/clk/imx/clk-imx8mm.c
drivers/clk/imx/clk-imx8mn.c
drivers/clk/imx/clk-imx8mp.c
drivers/clk/imx/clk-imx8mq.c
drivers/clk/imx/clk-imx8qxp-lpcg.c
drivers/clk/imx/clk-imx8qxp.c
drivers/clk/imx/clk-lpcg-scu.c
drivers/clk/imx/clk-pll14xx.c
drivers/clk/imx/clk-scu.c
drivers/clk/imx/clk-scu.h
drivers/clk/imx/clk.h
drivers/clk/ingenic/cgu.c
drivers/clk/mediatek/clk-mux.c
drivers/clk/mediatek/clk-mux.h
drivers/clk/meson/Kconfig
drivers/clk/meson/axg-aoclk.c
drivers/clk/meson/axg.c
drivers/clk/meson/axg.h
drivers/clk/meson/g12a-aoclk.c
drivers/clk/meson/g12a.c
drivers/clk/meson/g12a.h
drivers/clk/meson/gxbb-aoclk.c
drivers/clk/meson/gxbb.c
drivers/clk/meson/meson-aoclk.c
drivers/clk/meson/meson-eeclk.c
drivers/clk/mvebu/armada-37xx-xtal.c
drivers/clk/qcom/Kconfig
drivers/clk/qcom/Makefile
drivers/clk/qcom/camcc-sc7180.c [new file with mode: 0644]
drivers/clk/qcom/clk-alpha-pll.c
drivers/clk/qcom/clk-alpha-pll.h
drivers/clk/qcom/clk-rpmh.c
drivers/clk/qcom/dispcc-sm8250.c
drivers/clk/qcom/gcc-sc7180.c
drivers/clk/qcom/gcc-sdx55.c [new file with mode: 0644]
drivers/clk/qcom/lpass-gfm-sm8250.c [new file with mode: 0644]
drivers/clk/qcom/lpasscorecc-sc7180.c
drivers/clk/renesas/clk-sh73a0.c
drivers/clk/renesas/r8a774a1-cpg-mssr.c
drivers/clk/renesas/r8a774b1-cpg-mssr.c
drivers/clk/renesas/r8a774c0-cpg-mssr.c
drivers/clk/renesas/r8a779a0-cpg-mssr.c
drivers/clk/renesas/rcar-gen3-cpg.c
drivers/clk/renesas/rcar-gen3-cpg.h
drivers/clk/renesas/rcar-usb2-clock-sel.c
drivers/clk/renesas/renesas-cpg-mssr.c
drivers/clk/rockchip/Kconfig
drivers/clk/rockchip/clk-rk3188.c
drivers/clk/rockchip/clk.c
drivers/clk/samsung/Kconfig
drivers/clk/samsung/Makefile
drivers/clk/samsung/clk-pll.c
drivers/clk/sifive/Kconfig
drivers/clk/sifive/Makefile
drivers/clk/sifive/fu540-prci.c
drivers/clk/sifive/fu540-prci.h [new file with mode: 0644]
drivers/clk/sifive/fu740-prci.c [new file with mode: 0644]
drivers/clk/sifive/fu740-prci.h [new file with mode: 0644]
drivers/clk/sifive/sifive-prci.c [new file with mode: 0644]
drivers/clk/sifive/sifive-prci.h [new file with mode: 0644]
drivers/clk/sunxi-ng/ccu-sun50i-a64.c
drivers/clk/sunxi-ng/ccu-sun8i-h3.c
drivers/clk/tegra/clk-bpmp.c
drivers/clk/tegra/clk-dfll.c
drivers/clk/tegra/clk-id.h
drivers/clk/tegra/clk-tegra-periph.c
drivers/clk/ti/clk-54xx.c
drivers/clk/ti/fapll.c
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/dma-buf/dma-buf.c
drivers/dma-buf/dma-resv.c
drivers/dma-buf/heaps/Makefile
drivers/dma-buf/heaps/cma_heap.c
drivers/dma-buf/heaps/heap-helpers.c [deleted file]
drivers/dma-buf/heaps/heap-helpers.h [deleted file]
drivers/dma-buf/heaps/system_heap.c
drivers/firmware/psci/psci.c
drivers/gpio/Kconfig
drivers/gpio/Makefile
drivers/gpio/TODO
drivers/gpio/gpio-104-idi-48.c
drivers/gpio/gpio-amd8111.c
drivers/gpio/gpio-ath79.c
drivers/gpio/gpio-bt8xx.c
drivers/gpio/gpio-cs5535.c
drivers/gpio/gpio-dwapb.c
drivers/gpio/gpio-exar.c
drivers/gpio/gpio-hisi.c [new file with mode: 0644]
drivers/gpio/gpio-mockup.c
drivers/gpio/gpio-msc313.c [new file with mode: 0644]
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpio-mxc.c
drivers/gpio/gpio-mxs.c
drivers/gpio/gpio-omap.c
drivers/gpio/gpio-rcar.c
drivers/gpio/gpio-sifive.c
drivers/gpio/gpio-stmpe.c
drivers/gpio/gpio-tegra.c
drivers/gpio/gpio-tegra186.c
drivers/gpio/gpio-xilinx.c
drivers/gpio/gpio-xra1403.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-acpi.h
drivers/gpio/gpiolib-cdev.c
drivers/gpio/gpiolib-devres.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib-sysfs.c
drivers/gpio/gpiolib.c
drivers/gpio/gpiolib.h
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
drivers/gpu/drm/amd/amdkfd/Kconfig
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.h
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_dccg.h
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_link_encoder.h
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
drivers/gpu/drm/amd/display/modules/color/color_gamma.c
drivers/gpu/drm/amd/include/atomfirmware.h
drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/inc/smu_types.h
drivers/gpu/drm/amd/pm/inc/smu_v11_0_7_ppsmc.h
drivers/gpu/drm/amd/pm/inc/smu_v11_5_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.h
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_blend.c
drivers/gpu/drm/drm_bufs.c
drivers/gpu/drm/drm_client.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/drm_gem_shmem_helper.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/i915/display/intel_dp_mst.c
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_mman.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/imx/dcss/dcss-dev.h
drivers/gpu/drm/imx/dcss/dcss-plane.c
drivers/gpu/drm/imx/dcss/dcss-scaler.c
drivers/gpu/drm/lima/lima_gem.c
drivers/gpu/drm/mcde/Kconfig
drivers/gpu/drm/mcde/Makefile
drivers/gpu/drm/mcde/mcde_clk_div.c [new file with mode: 0644]
drivers/gpu/drm/mcde/mcde_display.c
drivers/gpu/drm/mcde/mcde_display_regs.h
drivers/gpu/drm/mcde/mcde_drm.h
drivers/gpu/drm/mcde/mcde_drv.c
drivers/gpu/drm/meson/meson_dw_hdmi.c
drivers/gpu/drm/mgag200/mgag200_drv.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/mxsfb/mxsfb_drv.c
drivers/gpu/drm/nouveau/dispnv50/disp.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/omapdrm/omap_gem.c
drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/panfrost/panfrost_gem.c
drivers/gpu/drm/qxl/qxl_ttm.c
drivers/gpu/drm/radeon/radeon_uvd.c
drivers/gpu/drm/radeon/radeon_vce.c
drivers/gpu/drm/ttm/ttm_pool.c
drivers/gpu/drm/udl/udl_drv.c
drivers/gpu/drm/v3d/v3d_bo.c
drivers/gpu/drm/vc4/vc4_txp.c
drivers/gpu/drm/vgem/vgem_drv.c
drivers/gpu/drm/via/via_irq.c
drivers/gpu/drm/via/via_verifier.c
drivers/gpu/drm/virtio/virtgpu_debugfs.c
drivers/gpu/drm/virtio/virtgpu_drv.h
drivers/gpu/drm/virtio/virtgpu_fence.c
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/gpu/drm/virtio/virtgpu_object.c
drivers/gpu/drm/vkms/vkms_drv.c
drivers/gpu/drm/vkms/vkms_writeback.c
drivers/hsi/controllers/omap_ssi_core.c
drivers/hsi/hsi_core.c
drivers/hwmon/k10temp.c
drivers/i3c/master.c
drivers/i3c/master/Kconfig
drivers/i3c/master/Makefile
drivers/i3c/master/mipi-i3c-hci/Makefile [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/cmd.h [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/cmd_v1.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/cmd_v2.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/core.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/dat.h [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/dat_v1.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/dct.h [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/dct_v1.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/dma.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/ext_caps.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/ext_caps.h [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/hci.h [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/ibi.h [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/pio.c [new file with mode: 0644]
drivers/i3c/master/mipi-i3c-hci/xfer_mode_rate.h [new file with mode: 0644]
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v4.c
drivers/md/Kconfig
drivers/md/Makefile
drivers/md/dm-cache-target.c
drivers/md/dm-crypt.c
drivers/md/dm-ebs-target.c
drivers/md/dm-ioctl.c
drivers/md/dm-ps-historical-service-time.c [moved from drivers/md/dm-historical-service-time.c with 100% similarity]
drivers/md/dm-ps-io-affinity.c [new file with mode: 0644]
drivers/md/dm-ps-queue-length.c [moved from drivers/md/dm-queue-length.c with 100% similarity]
drivers/md/dm-ps-round-robin.c [moved from drivers/md/dm-round-robin.c with 100% similarity]
drivers/md/dm-ps-service-time.c [moved from drivers/md/dm-service-time.c with 100% similarity]
drivers/md/dm-stripe.c
drivers/md/dm-switch.c
drivers/md/dm-unstripe.c
drivers/md/dm-verity-target.c
drivers/md/dm-verity-verify-sig.c
drivers/md/dm-zero.c
drivers/md/dm.c
drivers/mfd/menelaus.c
drivers/mtd/ubi/build.c
drivers/mtd/ubi/io.c
drivers/net/virtio_net.c
drivers/pcmcia/Kconfig
drivers/pcmcia/Makefile
drivers/pcmcia/db1xxx_ss.c
drivers/pcmcia/electra_cf.c
drivers/pcmcia/omap_cf.c
drivers/pcmcia/vrc4173_cardu.c [deleted file]
drivers/pcmcia/vrc4173_cardu.h [deleted file]
drivers/platform/chrome/cros_ec_proto.c
drivers/platform/chrome/cros_ec_typec.c
drivers/power/reset/Kconfig
drivers/power/reset/Makefile
drivers/power/reset/ocelot-reset.c
drivers/power/reset/qnap-poweroff.c
drivers/power/reset/regulator-poweroff.c [new file with mode: 0644]
drivers/power/reset/syscon-poweroff.c
drivers/power/supply/ab8500_btemp.c
drivers/power/supply/ab8500_charger.c
drivers/power/supply/ab8500_fg.c
drivers/power/supply/abx500_chargalg.c
drivers/power/supply/axp20x_usb_power.c
drivers/power/supply/axp288_charger.c
drivers/power/supply/bq24190_charger.c
drivers/power/supply/bq24735-charger.c
drivers/power/supply/bq25890_charger.c
drivers/power/supply/collie_battery.c
drivers/power/supply/generic-adc-battery.c
drivers/power/supply/max17042_battery.c
drivers/power/supply/max8997_charger.c
drivers/power/supply/pm2301_charger.c
drivers/power/supply/power_supply_sysfs.c
drivers/power/supply/s3c_adc_battery.c
drivers/power/supply/wm831x_power.c
drivers/pwm/Kconfig
drivers/pwm/Makefile
drivers/pwm/core.c
drivers/pwm/pwm-ab8500.c
drivers/pwm/pwm-atmel-tcb.c
drivers/pwm/pwm-atmel.c
drivers/pwm/pwm-bcm-iproc.c
drivers/pwm/pwm-bcm-kona.c
drivers/pwm/pwm-bcm2835.c
drivers/pwm/pwm-berlin.c
drivers/pwm/pwm-brcmstb.c
drivers/pwm/pwm-clps711x.c
drivers/pwm/pwm-crc.c
drivers/pwm/pwm-dwc.c [new file with mode: 0644]
drivers/pwm/pwm-ep93xx.c
drivers/pwm/pwm-fsl-ftm.c
drivers/pwm/pwm-hibvt.c
drivers/pwm/pwm-img.c
drivers/pwm/pwm-imx-tpm.c
drivers/pwm/pwm-imx1.c
drivers/pwm/pwm-imx27.c
drivers/pwm/pwm-intel-lgm.c [new file with mode: 0644]
drivers/pwm/pwm-iqs620a.c
drivers/pwm/pwm-keembay.c [new file with mode: 0644]
drivers/pwm/pwm-lp3943.c
drivers/pwm/pwm-lpc18xx-sct.c
drivers/pwm/pwm-lpc32xx.c
drivers/pwm/pwm-lpss-platform.c
drivers/pwm/pwm-lpss.c
drivers/pwm/pwm-mediatek.c
drivers/pwm/pwm-meson.c
drivers/pwm/pwm-mtk-disp.c
drivers/pwm/pwm-pxa.c
drivers/pwm/pwm-rcar.c
drivers/pwm/pwm-renesas-tpu.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-samsung.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-sl28cpld.c
drivers/pwm/pwm-spear.c
drivers/pwm/pwm-sti.c
drivers/pwm/pwm-sun4i.c
drivers/pwm/pwm-tegra.c
drivers/pwm/pwm-tiecap.c
drivers/pwm/pwm-tiehrpwm.c
drivers/pwm/pwm-vt8500.c
drivers/pwm/pwm-zx.c
drivers/rtc/Kconfig
drivers/rtc/class.c
drivers/rtc/nvmem.c
drivers/rtc/rtc-88pm80x.c
drivers/rtc/rtc-88pm860x.c
drivers/rtc/rtc-ab-b5ze-s3.c
drivers/rtc/rtc-ab-eoz9.c
drivers/rtc/rtc-ab3100.c
drivers/rtc/rtc-ab8500.c
drivers/rtc/rtc-abx80x.c
drivers/rtc/rtc-ac100.c
drivers/rtc/rtc-armada38x.c
drivers/rtc/rtc-aspeed.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-at91sam9.c
drivers/rtc/rtc-au1xxx.c
drivers/rtc/rtc-bd70528.c
drivers/rtc/rtc-brcmstb-waketimer.c
drivers/rtc/rtc-cadence.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-coh901331.c
drivers/rtc/rtc-cpcap.c
drivers/rtc/rtc-cros-ec.c
drivers/rtc/rtc-da9052.c
drivers/rtc/rtc-da9063.c
drivers/rtc/rtc-davinci.c
drivers/rtc/rtc-digicolor.c
drivers/rtc/rtc-dm355evm.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-ds1343.c
drivers/rtc/rtc-ds1347.c
drivers/rtc/rtc-ds1374.c
drivers/rtc/rtc-ds1511.c
drivers/rtc/rtc-ds1553.c
drivers/rtc/rtc-ds1672.c
drivers/rtc/rtc-ds1685.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-ds2404.c
drivers/rtc/rtc-ds3232.c
drivers/rtc/rtc-ep93xx.c
drivers/rtc/rtc-fsl-ftm-alarm.c
drivers/rtc/rtc-ftrtc010.c
drivers/rtc/rtc-goldfish.c
drivers/rtc/rtc-hym8563.c
drivers/rtc/rtc-imx-sc.c
drivers/rtc/rtc-imxdi.c
drivers/rtc/rtc-isl12026.c
drivers/rtc/rtc-isl1208.c
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-lpc32xx.c
drivers/rtc/rtc-ls1x.c
drivers/rtc/rtc-m41t80.c
drivers/rtc/rtc-m48t59.c
drivers/rtc/rtc-m48t86.c
drivers/rtc/rtc-mc13xxx.c
drivers/rtc/rtc-meson-vrtc.c
drivers/rtc/rtc-meson.c
drivers/rtc/rtc-mpc5121.c
drivers/rtc/rtc-mrst.c
drivers/rtc/rtc-mt2712.c
drivers/rtc/rtc-mt6397.c
drivers/rtc/rtc-mv.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-mxc_v2.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-pcap.c
drivers/rtc/rtc-pcf2123.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-pcf85063.c
drivers/rtc/rtc-pcf8523.c
drivers/rtc/rtc-pcf85363.c
drivers/rtc/rtc-pcf8563.c
drivers/rtc/rtc-pic32.c
drivers/rtc/rtc-pl030.c
drivers/rtc/rtc-pl031.c
drivers/rtc/rtc-pm8xxx.c
drivers/rtc/rtc-ps3.c
drivers/rtc/rtc-r9701.c
drivers/rtc/rtc-rc5t619.c
drivers/rtc/rtc-rk808.c
drivers/rtc/rtc-rp5c01.c
drivers/rtc/rtc-rs5c348.c
drivers/rtc/rtc-rv3028.c
drivers/rtc/rtc-rv3029c2.c
drivers/rtc/rtc-rv3032.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-rx6110.c
drivers/rtc/rtc-rx8010.c
drivers/rtc/rtc-rx8581.c
drivers/rtc/rtc-s35390a.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-sa1100.c
drivers/rtc/rtc-sc27xx.c
drivers/rtc/rtc-sd3078.c
drivers/rtc/rtc-sh.c
drivers/rtc/rtc-sirfsoc.c
drivers/rtc/rtc-snvs.c
drivers/rtc/rtc-st-lpc.c
drivers/rtc/rtc-starfire.c
drivers/rtc/rtc-stk17ta8.c
drivers/rtc/rtc-stmp3xxx.c
drivers/rtc/rtc-sun4v.c
drivers/rtc/rtc-sun6i.c
drivers/rtc/rtc-sunxi.c
drivers/rtc/rtc-tegra.c
drivers/rtc/rtc-test.c
drivers/rtc/rtc-tps6586x.c
drivers/rtc/rtc-tps65910.c
drivers/rtc/rtc-tx4939.c
drivers/rtc/rtc-vr41xx.c
drivers/rtc/rtc-vt8500.c
drivers/rtc/rtc-wilco-ec.c
drivers/rtc/rtc-wm831x.c
drivers/rtc/rtc-xgene.c
drivers/rtc/rtc-zynqmp.c
drivers/rtc/sysfs.c
drivers/s390/cio/device.c
drivers/s390/crypto/zcrypt_cex2a.c
drivers/s390/crypto/zcrypt_cex4.c
drivers/staging/android/ashmem.c
drivers/thermal/intel/int340x_thermal/int3400_thermal.c
drivers/thermal/intel/int340x_thermal/int3403_thermal.c
drivers/vdpa/Kconfig
drivers/vdpa/ifcvf/ifcvf_main.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa.c
drivers/vdpa/vdpa_sim/Makefile
drivers/vdpa/vdpa_sim/vdpa_sim.c
drivers/vdpa/vdpa_sim/vdpa_sim.h [new file with mode: 0644]
drivers/vdpa/vdpa_sim/vdpa_sim_net.c [new file with mode: 0644]
drivers/vfio/virqfd.c
drivers/vhost/scsi.c
drivers/vhost/vdpa.c
drivers/video/fbdev/geode/lxfb_ops.c
drivers/video/fbdev/pm2fb.c
drivers/virtio/virtio_mem.c
drivers/virtio/virtio_ring.c
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/geodewdt.c
drivers/watchdog/hpwdt.c
drivers/watchdog/iTCO_wdt.c
drivers/watchdog/mpc8xxx_wdt.c
drivers/watchdog/pnx833x_wdt.c [deleted file]
drivers/watchdog/qcom-wdt.c
drivers/watchdog/rti_wdt.c
drivers/watchdog/sbc_fitpc2_wdt.c
drivers/watchdog/sp805_wdt.c
drivers/watchdog/sprd_wdt.c
drivers/watchdog/stm32_iwdg.c
drivers/watchdog/watchdog_core.c
drivers/watchdog/wdat_wdt.c
drivers/xen/Makefile
drivers/xen/manage.c
fs/9p/fid.c
fs/9p/fid.h
fs/9p/vfs_dentry.c
fs/9p/vfs_dir.c
fs/9p/vfs_file.c
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/9p/vfs_super.c
fs/9p/xattr.c
fs/cifs/Kconfig
fs/cifs/Makefile
fs/cifs/cache.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_dfs_ref.c
fs/cifs/cifs_fs_sb.h
fs/cifs/cifs_swn.c [new file with mode: 0644]
fs/cifs/cifs_swn.h [new file with mode: 0644]
fs/cifs/cifsacl.c
fs/cifs/cifsacl.h
fs/cifs/cifsencrypt.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/cifs/dfs_cache.c
fs/cifs/dfs_cache.h
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/fs_context.c
fs/cifs/fs_context.h
fs/cifs/fscache.c
fs/cifs/fscache.h
fs/cifs/inode.c
fs/cifs/misc.c
fs/cifs/netlink.c [new file with mode: 0644]
fs/cifs/netlink.h [new file with mode: 0644]
fs/cifs/readdir.c
fs/cifs/sess.c
fs/cifs/smb1ops.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smb2proto.h
fs/cifs/smbdirect.c
fs/cifs/trace.h
fs/cifs/transport.c
fs/cifs/unc.c [new file with mode: 0644]
fs/cifs/xattr.c
fs/configfs/dir.c
fs/eventfd.c
fs/eventpoll.c
fs/exfat/nls.c
fs/file.c
fs/gfs2/glock.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/inode.h
fs/gfs2/super.c
fs/gfs2/util.c
fs/gfs2/util.h
fs/jffs2/debug.h
fs/jffs2/jffs2_fs_sb.h
fs/jffs2/nodelist.h
fs/jffs2/readinode.c
fs/jffs2/super.c
fs/orangefs/file.c
fs/ubifs/auth.c
fs/ubifs/commit.c
fs/ubifs/debug.c
fs/ubifs/debug.h
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/io.c
fs/ubifs/journal.c
fs/ubifs/lpt.c
fs/ubifs/master.c
fs/ubifs/orphan.c
fs/ubifs/recovery.c
fs/ubifs/replay.c
fs/ubifs/sb.c
fs/ubifs/scan.c
fs/ubifs/super.c
fs/ubifs/tnc.c
fs/ubifs/tnc_misc.c
fs/ubifs/ubifs.h
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rtbitmap.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/libxfs/xfs_sb.h
fs/xfs/libxfs/xfs_shared.h
fs/xfs/libxfs/xfs_types.c
fs/xfs/libxfs/xfs_types.h
fs/xfs/scrub/agheader_repair.c
fs/xfs/scrub/bmap.c
fs/xfs/scrub/common.c
fs/xfs/scrub/dir.c
fs/xfs/scrub/inode.c
fs/xfs/scrub/parent.c
fs/xfs/scrub/rtbitmap.c
fs/xfs/xfs_acl.c
fs/xfs/xfs_bmap_item.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.h
fs/xfs/xfs_iwalk.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_rmap_item.c
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_rtalloc.h
fs/xfs/xfs_super.c
fs/xfs/xfs_symlink.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_dquot.c
include/acpi/cppc_acpi.h
include/asm-generic/io.h
include/drm/drm_fb_helper.h
include/drm/drm_gem_shmem_helper.h
include/drm/drm_modes.h
include/drm/drm_modeset_helper_vtables.h
include/dt-bindings/clock/at91.h
include/dt-bindings/clock/fsl,qoriq-clockgen.h [new file with mode: 0644]
include/dt-bindings/clock/g12a-clkc.h
include/dt-bindings/clock/k210-clk.h
include/dt-bindings/clock/qcom,camcc-sc7180.h [new file with mode: 0644]
include/dt-bindings/clock/qcom,gcc-sdx55.h [new file with mode: 0644]
include/dt-bindings/clock/qcom,rpmh.h
include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h [new file with mode: 0644]
include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h [new file with mode: 0644]
include/dt-bindings/clock/sifive-fu740-prci.h [new file with mode: 0644]
include/dt-bindings/gpio/tegra186-gpio.h
include/kvm/arm_pmu.h
include/kvm/arm_vgic.h
include/linux/clk-provider.h
include/linux/clk.h
include/linux/clk/samsung.h
include/linux/compat.h
include/linux/cpufreq.h
include/linux/dma-buf-map.h
include/linux/dma-map-ops.h
include/linux/eventfd.h
include/linux/gpio/consumer.h
include/linux/gpio/driver.h
include/linux/irqchip/arm-gic-v4.h
include/linux/kasan-checks.h
include/linux/kasan.h
include/linux/kvm_dirty_ring.h [new file with mode: 0644]
include/linux/kvm_host.h
include/linux/memcontrol.h
include/linux/mm.h
include/linux/mmdebug.h
include/linux/moduleloader.h
include/linux/page-flags-layout.h
include/linux/platform_data/cros_ec_commands.h
include/linux/power/generic-adc-battery.h
include/linux/psci.h
include/linux/pwm.h
include/linux/rtc.h
include/linux/s3c_adc_battery.h
include/linux/sched.h
include/linux/sched/cpufreq.h
include/linux/string.h
include/linux/syscalls.h
include/linux/vdpa.h
include/linux/wait.h
include/net/9p/client.h
include/trace/events/clk.h
include/trace/events/kvm.h
include/uapi/asm-generic/unistd.h
include/uapi/drm/drm_mode.h
include/uapi/linux/cifs/cifs_netlink.h [new file with mode: 0644]
include/uapi/linux/gpio.h
include/uapi/linux/kvm.h
include/uapi/linux/virtio_gpu.h
include/uapi/linux/virtio_ids.h
init/init_task.c
kernel/dma/Kconfig
kernel/dma/Makefile
kernel/dma/contiguous.c
kernel/dma/map_benchmark.c [new file with mode: 0644]
kernel/dma/mapping.c
kernel/dma/pool.c
kernel/fork.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/wait.c
kernel/sys_ni.c
lib/Kconfig
lib/Kconfig.debug
lib/Kconfig.kasan
lib/Makefile
lib/devmem_is_allowed.c [new file with mode: 0644]
lib/test_kasan.c
lib/test_kasan_module.c
mm/Kconfig
mm/filemap.c
mm/kasan/Makefile
mm/kasan/common.c
mm/kasan/generic.c
mm/kasan/generic_report.c [deleted file]
mm/kasan/hw_tags.c [new file with mode: 0644]
mm/kasan/init.c
mm/kasan/kasan.h
mm/kasan/quarantine.c
mm/kasan/report.c
mm/kasan/report_generic.c [new file with mode: 0644]
mm/kasan/report_hw_tags.c [new file with mode: 0644]
mm/kasan/report_sw_tags.c [moved from mm/kasan/tags_report.c with 78% similarity]
mm/kasan/shadow.c [new file with mode: 0644]
mm/kasan/sw_tags.c [moved from mm/kasan/tags.c with 88% similarity]
mm/memcontrol.c
mm/memory_hotplug.c
mm/mempool.c
mm/mmap.c
mm/page_alloc.c
mm/page_poison.c
mm/ptdump.c
mm/slab_common.c
mm/slub.c
mm/util.c
net/9p/client.c
scripts/Makefile.extrawarn
scripts/Makefile.lib
scripts/bloat-o-meter
scripts/config
scripts/diffconfig
scripts/genksyms/keywords.c
scripts/genksyms/lex.l
scripts/genksyms/parse.y
scripts/get_abi.pl
scripts/kconfig/conf.c
scripts/kconfig/confdata.c
scripts/kconfig/lexer.l
scripts/kconfig/lkc.h
scripts/kconfig/preprocess.c
scripts/kconfig/qconf-cfg.sh
scripts/kconfig/qconf.cc
scripts/kconfig/symbol.c
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/show_delta
scripts/sphinx-pre-install
scripts/split-man.pl
scripts/tracing/draw_functrace.py
sound/core/init.c
sound/core/memalloc.c
sound/core/oss/pcm_oss.c
sound/core/pcm_memory.c
sound/core/pcm_native.c
sound/pci/hda/patch_realtek.c
sound/usb/card.c
sound/usb/clock.c
sound/usb/implicit.c
sound/usb/quirks.c
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/disabled-features.h
tools/arch/x86/include/asm/msr-index.h
tools/build/feature/Makefile
tools/gpio/gpio-event-mon.c
tools/gpio/lsgpio.c
tools/include/linux/build_bug.h
tools/include/linux/compiler.h
tools/include/linux/compiler_types.h [new file with mode: 0644]
tools/include/linux/ctype.h
tools/include/linux/string.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/linux/const.h
tools/include/uapi/linux/fscrypt.h
tools/include/uapi/linux/perf_event.h
tools/include/uapi/linux/prctl.h
tools/include/uapi/linux/stat.h
tools/lib/string.c
tools/perf/Documentation/itrace.txt
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm64/Makefile
tools/perf/arch/arm64/util/Build
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/arm64/util/arm64_exception_types.h [new file with mode: 0644]
tools/perf/arch/arm64/util/kvm-stat.c [new file with mode: 0644]
tools/perf/arch/arm64/util/mem-events.c [new file with mode: 0644]
tools/perf/arch/arm64/util/perf_regs.c
tools/perf/arch/mips/Build
tools/perf/arch/mips/annotate/instructions.c [new file with mode: 0644]
tools/perf/arch/x86/include/arch-tests.h
tools/perf/arch/x86/tests/Build
tools/perf/arch/x86/tests/arch-tests.c
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/arch/x86/util/intel-bts.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/builtin-annotate.c
tools/perf/builtin-c2c.c
tools/perf/builtin-diff.c
tools/perf/builtin-evlist.c
tools/perf/builtin-ftrace.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/builtin-version.c
tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json [new file with mode: 0644]
tools/perf/pmu-events/arch/test/arch-std-events.json [new file with mode: 0644]
tools/perf/pmu-events/arch/test/test_cpu/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/skylake/cache.json
tools/perf/pmu-events/arch/x86/skylake/floating-point.json
tools/perf/pmu-events/arch/x86/skylake/frontend.json
tools/perf/pmu-events/arch/x86/skylake/memory.json
tools/perf/pmu-events/arch/x86/skylake/other.json
tools/perf/pmu-events/arch/x86/skylake/pipeline.json
tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
tools/perf/pmu-events/jevents.c
tools/perf/pmu-events/pmu-events.h
tools/perf/python/tracepoint.py
tools/perf/python/twatch.py
tools/perf/tests/Build
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/bpf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/code-reading.c
tools/perf/tests/event-times.c
tools/perf/tests/event_update.c
tools/perf/tests/evsel-tp-sched.c
tools/perf/tests/expand-cgroup.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/llvm.c
tools/perf/tests/mmap-basic.c
tools/perf/tests/openat-syscall-tp-fields.c
tools/perf/tests/parse-events.c
tools/perf/tests/parse-metric.c
tools/perf/tests/parse-no-sample-id-all.c
tools/perf/tests/perf-record.c
tools/perf/tests/perf-time-to-tsc.c [moved from tools/perf/arch/x86/tests/perf-time-to-tsc.c with 91% similarity]
tools/perf/tests/pmu-events.c
tools/perf/tests/sample-parsing.c
tools/perf/tests/shell/stat+shadow_stat.sh [new file with mode: 0755]
tools/perf/tests/shell/trace+probe_vfs_getname.sh
tools/perf/tests/sw-clock.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/task-exit.c
tools/perf/tests/tests.h
tools/perf/tests/topology.c
tools/perf/tests/wp.c
tools/perf/trace/beauty/include/linux/socket.h
tools/perf/trace/beauty/mmap_flags.sh
tools/perf/trace/beauty/mmap_prot.sh
tools/perf/ui/browsers/hists.c
tools/perf/ui/gtk/gtk.h
tools/perf/ui/gtk/hists.c
tools/perf/util/annotate.c
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
tools/perf/util/arm-spe.c
tools/perf/util/auxtrace.c
tools/perf/util/auxtrace.h
tools/perf/util/bpf-event.c
tools/perf/util/bpf-loader.c
tools/perf/util/bpf-loader.h
tools/perf/util/build-id.c
tools/perf/util/build-id.h
tools/perf/util/cgroup.c
tools/perf/util/data.c
tools/perf/util/data.h
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/dso.c
tools/perf/util/dso.h
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/evswitch.c
tools/perf/util/expr.c
tools/perf/util/expr.h
tools/perf/util/expr.y
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/intel-pt.c
tools/perf/util/machine.c
tools/perf/util/map_symbol.h
tools/perf/util/mem-events.c
tools/perf/util/mem-events.h
tools/perf/util/mem2node.c
tools/perf/util/metricgroup.c
tools/perf/util/parse-events.c
tools/perf/util/parse-regs-options.c
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/python.c
tools/perf/util/record.c
tools/perf/util/record.h
tools/perf/util/s390-cpumsf.c
tools/perf/util/s390-sample-raw.c
tools/perf/util/sample-raw.c
tools/perf/util/sample-raw.h
tools/perf/util/session.c
tools/perf/util/sideband_evlist.c
tools/perf/util/sort.c
tools/perf/util/sort.h
tools/perf/util/stat-display.c
tools/perf/util/stat.c
tools/perf/util/stat.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/synthetic-events.c
tools/perf/util/unwind-libdw.c
tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
tools/testing/ktest/compare-ktest-sample.pl
tools/testing/ktest/ktest.pl
tools/testing/kunit/kunit.py
tools/testing/kunit/kunit_tool_test.py
tools/testing/selftests/arm64/mte/Makefile
tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_offload.py
tools/testing/selftests/core/close_range_test.c
tools/testing/selftests/dma/Makefile [new file with mode: 0644]
tools/testing/selftests/dma/config [new file with mode: 0644]
tools/testing/selftests/dma/dma_map_benchmark.c [new file with mode: 0644]
tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
tools/testing/selftests/kselftest/prefix.pl
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/get-reg-list.c
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/dirty_log_test.c
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/perf_test_util.h
tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/vmx.h
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/kvm_util_internal.h
tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/s390x/processor.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/s390x/sync_regs_test.c
tools/testing/selftests/kvm/set_memory_region_test.c
tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
tools/testing/selftests/kvm/x86_64/debug_regs.c
tools/testing/selftests/kvm/x86_64/evmcs_test.c
tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/kvm/x86_64/set_sregs_test.c
tools/testing/selftests/kvm/x86_64/smm_test.c
tools/testing/selftests/kvm/x86_64/state_test.c
tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
tools/testing/selftests/kvm/x86_64/user_msr_test.c [deleted file]
tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
tools/testing/selftests/net/devlink_port_split.py
tools/testing/selftests/tc-testing/tdc_batch.py
tools/testing/selftests/tc-testing/tdc_multibatch.py
tools/virtio/asm/barrier.h
tools/virtio/linux/bug.h
tools/virtio/linux/kernel.h
virt/kvm/coalesced_mmio.c
virt/kvm/dirty_ring.c [new file with mode: 0644]
virt/kvm/eventfd.c
virt/kvm/kvm_main.c

index 1a04ca8..0eee30b 100644 (file)
@@ -264,7 +264,8 @@ Description:        Discover CPUs in the same CPU frequency coordination domain
                attribute is useful for user space DVFS controllers to get better
                power/performance results for platforms using acpi-cpufreq.
 
-               This file is only present if the acpi-cpufreq driver is in use.
+               This file is only present if the acpi-cpufreq or the cppc-cpufreq
+               drivers are in use.
 
 
 What:          /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
index 8c50e5c..1a6b913 100644 (file)
@@ -134,7 +134,12 @@ root_hash_sig_key_desc <key_description>
     the pkcs7 signature of the roothash. The pkcs7 signature is used to validate
     the root hash during the creation of the device mapper block device.
     Verification of roothash depends on the config DM_VERITY_VERIFY_ROOTHASH_SIG
-    being set in the kernel.
+    being set in the kernel.  The signatures are checked against the builtin
+    trusted keyring by default, or the secondary trusted keyring if
+    DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING is set.  The secondary
+    trusted keyring includes by default the builtin trusted keyring, and it can
+    also gain new certificates at run time if they are signed by a certificate
+    already in the secondary trusted keyring.
 
 Theory of operation
 ===================
index d243361..c722ec1 100644 (file)
                        for all guests.
                        Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
 
+       kvm-arm.mode=
+                       [KVM,ARM] Select one of KVM/arm64's modes of operation.
+
+                       protected: nVHE-based mode with support for guests whose
+                                  state is kept private from the host.
+                                  Not valid if the kernel is running in EL2.
+
+                       Defaults to VHE/nVHE based on hardware support and
+                       the value of CONFIG_ARM64_VHE.
+
        kvm-arm.vgic_v3_group0_trap=
                        [KVM,ARM] Trap guest accesses to GICv3 group-0
                        system registers
index 1307b52..904e4eb 100644 (file)
@@ -84,11 +84,14 @@ capabilities then providing the process with CAP_PERFMON capability singly
 is recommended as the preferred secure approach to resolve double access
 denial logging related to usage of performance monitoring and observability.
 
-Unprivileged processes using perf_events system call are also subject
-for PTRACE_MODE_READ_REALCREDS ptrace access mode check [7]_ , whose
-outcome determines whether monitoring is permitted. So unprivileged
-processes provided with CAP_SYS_PTRACE capability are effectively
-permitted to pass the check.
+Prior Linux v5.9 unprivileged processes using perf_events system call
+are also subject for PTRACE_MODE_READ_REALCREDS ptrace access mode check
+[7]_ , whose outcome determines whether monitoring is permitted.
+So unprivileged processes provided with CAP_SYS_PTRACE capability are
+effectively permitted to pass the check. Starting from Linux v5.9
+CAP_SYS_PTRACE capability is not required and CAP_PERFMON is enough to
+be provided for processes to make performance monitoring and observability
+operations.
 
 Other capabilities being granted to unprivileged processes can
 effectively enable capturing of additional data required for later
@@ -99,11 +102,11 @@ CAP_SYSLOG capability permits reading kernel space memory addresses from
 Privileged Perf users groups
 ---------------------------------
 
-Mechanisms of capabilities, privileged capability-dumb files [6]_ and
-file system ACLs [10]_ can be used to create dedicated groups of
-privileged Perf users who are permitted to execute performance monitoring
-and observability without scope limits. The following steps can be
-taken to create such groups of privileged Perf users.
+Mechanisms of capabilities, privileged capability-dumb files [6]_,
+file system ACLs [10]_ and sudo [15]_ utility can be used to create
+dedicated groups of privileged Perf users who are permitted to execute
+performance monitoring and observability without limits. The following
+steps can be taken to create such groups of privileged Perf users.
 
 1. Create perf_users group of privileged Perf users, assign perf_users
    group to Perf tool executable and limit access to the executable for
@@ -133,7 +136,7 @@ taken to create such groups of privileged Perf users.
    # getcap perf
    perf = cap_sys_ptrace,cap_syslog,cap_perfmon+ep
 
-If the libcap installed doesn't yet support "cap_perfmon", use "38" instead,
+If the libcap [16]_ installed doesn't yet support "cap_perfmon", use "38" instead,
 i.e.:
 
 ::
@@ -159,6 +162,60 @@ performance monitoring and observability by using functionality of the
 configured Perf tool executable that, when executes, passes perf_events
 subsystem scope checks.
 
+In case Perf tool executable can't be assigned required capabilities (e.g.
+file system is mounted with nosuid option or extended attributes are
+not supported by the file system) then creation of the capabilities
+privileged environment, naturally shell, is possible. The shell provides
+inherent processes with CAP_PERFMON and other required capabilities so that
+performance monitoring and observability operations are available in the
+environment without limits. Access to the environment can be open via sudo
+utility for members of perf_users group only. In order to create such
+environment:
+
+1. Create shell script that uses capsh utility [16]_ to assign CAP_PERFMON
+   and other required capabilities into ambient capability set of the shell
+   process, lock the process security bits after enabling SECBIT_NO_SETUID_FIXUP,
+   SECBIT_NOROOT and SECBIT_NO_CAP_AMBIENT_RAISE bits and then change
+   the process identity to sudo caller of the script who should essentially
+   be a member of perf_users group:
+
+::
+
+   # ls -alh /usr/local/bin/perf.shell
+   -rwxr-xr-x. 1 root root 83 Oct 13 23:57 /usr/local/bin/perf.shell
+   # cat /usr/local/bin/perf.shell
+   exec /usr/sbin/capsh --iab=^cap_perfmon --secbits=239 --user=$SUDO_USER -- -l
+
+2. Extend sudo policy at /etc/sudoers file with a rule for perf_users group:
+
+::
+
+   # grep perf_users /etc/sudoers
+   %perf_users    ALL=/usr/local/bin/perf.shell
+
+3. Check that members of perf_users group have access to the privileged
+   shell and have CAP_PERFMON and other required capabilities enabled
+   in permitted, effective and ambient capability sets of an inherent process:
+
+::
+
+  $ id
+  uid=1003(capsh_test) gid=1004(capsh_test) groups=1004(capsh_test),1000(perf_users) context=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
+  $ sudo perf.shell
+  [sudo] password for capsh_test:
+  $ grep Cap /proc/self/status
+  CapInh:        0000004000000000
+  CapPrm:        0000004000000000
+  CapEff:        0000004000000000
+  CapBnd:        000000ffffffffff
+  CapAmb:        0000004000000000
+  $ capsh --decode=0000004000000000
+  0x0000004000000000=cap_perfmon
+
+As a result, members of perf_users group have access to the privileged
+environment where they can use tools employing performance monitoring APIs
+governed by CAP_PERFMON Linux capability.
+
 This specific access control management is only available to superuser
 or root running processes with CAP_SETPCAP, CAP_SETFCAP [6]_
 capabilities.
@@ -264,3 +321,5 @@ Bibliography
 .. [12] `<http://man7.org/linux/man-pages/man5/limits.conf.5.html>`_
 .. [13] `<https://sites.google.com/site/fullycapable>`_
 .. [14] `<http://man7.org/linux/man-pages/man8/auditd.8.html>`_
+.. [15] `<https://man7.org/linux/man-pages/man8/sudo.8.html>`_
+.. [16] `<https://git.kernel.org/pub/scm/libs/libcap/libcap.git/>`_
index 0521b4c..0cb1e29 100644 (file)
@@ -45,9 +45,14 @@ fffe8000     fffeffff        DTCM mapping area for platforms with
 fffe0000       fffe7fff        ITCM mapping area for platforms with
                                ITCM mounted inside the CPU.
 
-ffc00000       ffefffff        Fixmap mapping region.  Addresses provided
+ffc80000       ffefffff        Fixmap mapping region.  Addresses provided
                                by fix_to_virt() will be located here.
 
+ffc00000       ffc7ffff        Guard region
+
+ff800000       ffbfffff        Permanent, fixed read-only mapping of the
+                               firmware provided DT blob
+
 fee00000       feffffff        Mapping of PCI I/O space. This is a static
                                mapping within the vmalloc space.
 
@@ -72,6 +77,11 @@ MODULES_VADDR        MODULES_END-1   Kernel module space
                                Kernel modules inserted via insmod are
                                placed here using dynamic mappings.
 
+TASK_SIZE      MODULES_VADDR-1 KASAn shadow memory when KASan is in use.
+                               The range from MODULES_VADDR to the top
+                               of the memory is shadowed here with 1 bit
+                               per byte of memory.
+
 00001000       TASK_SIZE-1     User space mappings
                                Per-thread mappings are placed here via
                                the mmap() system call.
index e7522e5..901cd09 100644 (file)
@@ -97,7 +97,7 @@ hypervisor maps kernel pages in EL2 at a fixed (and potentially
 random) offset from the linear mapping. See the kern_hyp_va macro and
 kvm_update_va_mask function for more details. MMIO devices such as
 GICv2 gets mapped next to the HYP idmap page, as do vectors when
-ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs.
+ARM64_SPECTRE_V3A is enabled for particular CPUs.
 
 When using KVM with the Virtualization Host Extensions, no additional
 mappings are created, since the host kernel runs directly in EL2.
index 6b752a4..0fc3fb1 100644 (file)
@@ -4,13 +4,16 @@ The Kernel Address Sanitizer (KASAN)
 Overview
 --------
 
-KernelAddressSANitizer (KASAN) is a dynamic memory error detector designed to
-find out-of-bound and use-after-free bugs. KASAN has two modes: generic KASAN
-(similar to userspace ASan) and software tag-based KASAN (similar to userspace
-HWASan).
+KernelAddressSANitizer (KASAN) is a dynamic memory safety error detector
+designed to find out-of-bound and use-after-free bugs. KASAN has three modes:
 
-KASAN uses compile-time instrumentation to insert validity checks before every
-memory access, and therefore requires a compiler version that supports that.
+1. generic KASAN (similar to userspace ASan),
+2. software tag-based KASAN (similar to userspace HWASan),
+3. hardware tag-based KASAN (based on hardware memory tagging).
+
+Software KASAN modes (1 and 2) use compile-time instrumentation to insert
+validity checks before every memory access, and therefore require a compiler
+version that supports that.
 
 Generic KASAN is supported in both GCC and Clang. With GCC it requires version
 8.3.0 or later. Any supported Clang version is compatible, but detection of
@@ -18,8 +21,8 @@ out-of-bounds accesses for global variables is only supported since Clang 11.
 
 Tag-based KASAN is only supported in Clang.
 
-Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and
-riscv architectures, and tag-based KASAN is supported only for arm64.
+Currently generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390
+and riscv architectures, and tag-based KASAN modes are supported only for arm64.
 
 Usage
 -----
@@ -28,30 +31,22 @@ To enable KASAN configure kernel with::
 
          CONFIG_KASAN = y
 
-and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN) and
-CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN).
+and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN),
+CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN), and
+CONFIG_KASAN_HW_TAGS (to enable hardware tag-based KASAN).
+
+For software modes, you also need to choose between CONFIG_KASAN_OUTLINE and
+CONFIG_KASAN_INLINE. Outline and inline are compiler instrumentation types.
+The former produces smaller binary while the latter is 1.1 - 2 times faster.
 
-You also need to choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE.
-Outline and inline are compiler instrumentation types. The former produces
-smaller binary while the latter is 1.1 - 2 times faster.
+Both software KASAN modes work with both SLUB and SLAB memory allocators,
+while the hardware tag-based KASAN currently only support SLUB.
 
-Both KASAN modes work with both SLUB and SLAB memory allocators.
-For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
+For better error reports that include stack traces, enable CONFIG_STACKTRACE.
 
 To augment reports with last allocation and freeing stack of the physical page,
 it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
 
-To disable instrumentation for specific files or directories, add a line
-similar to the following to the respective kernel Makefile:
-
-- For a single file (e.g. main.o)::
-
-    KASAN_SANITIZE_main.o := n
-
-- For all files in one directory::
-
-    KASAN_SANITIZE := n
-
 Error reports
 ~~~~~~~~~~~~~
 
@@ -136,22 +131,75 @@ freed (in case of a use-after-free bug report). Next comes a description of
 the accessed slab object and information about the accessed memory page.
 
 In the last section the report shows memory state around the accessed address.
-Reading this part requires some understanding of how KASAN works.
-
-The state of each 8 aligned bytes of memory is encoded in one shadow byte.
-Those 8 bytes can be accessible, partially accessible, freed or be a redzone.
-We use the following encoding for each shadow byte: 0 means that all 8 bytes
-of the corresponding memory region are accessible; number N (1 <= N <= 7) means
-that the first N bytes are accessible, and other (8 - N) bytes are not;
-any negative value indicates that the entire 8-byte word is inaccessible.
-We use different negative values to distinguish between different kinds of
-inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
+Internally KASAN tracks memory state separately for each memory granule, which
+is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the
+memory state section of the report shows the state of one of the memory
+granules that surround the accessed address.
+
+For generic KASAN the size of each memory granule is 8. The state of each
+granule is encoded in one shadow byte. Those 8 bytes can be accessible,
+partially accessible, freed or be a part of a redzone. KASAN uses the following
+encoding for each shadow byte: 0 means that all 8 bytes of the corresponding
+memory region are accessible; number N (1 <= N <= 7) means that the first N
+bytes are accessible, and other (8 - N) bytes are not; any negative value
+indicates that the entire 8-byte word is inaccessible. KASAN uses different
+negative values to distinguish between different kinds of inaccessible memory
+like redzones or freed memory (see mm/kasan/kasan.h).
 
 In the report above the arrows point to the shadow byte 03, which means that
 the accessed address is partially accessible.
 
 For tag-based KASAN this last report section shows the memory tags around the
-accessed address (see Implementation details section).
+accessed address (see `Implementation details`_ section).
+
+Boot parameters
+~~~~~~~~~~~~~~~
+
+Hardware tag-based KASAN mode (see the section about different mode below) is
+intended for use in production as a security mitigation. Therefore it supports
+boot parameters that allow to disable KASAN competely or otherwise control
+particular KASAN features.
+
+The things that can be controlled are:
+
+1. Whether KASAN is enabled at all.
+2. Whether KASAN collects and saves alloc/free stacks.
+3. Whether KASAN panics on a detected bug or not.
+
+The ``kasan.mode`` boot parameter allows to choose one of three main modes:
+
+- ``kasan.mode=off`` - KASAN is disabled, no tag checks are performed
+- ``kasan.mode=prod`` - only essential production features are enabled
+- ``kasan.mode=full`` - all KASAN features are enabled
+
+The chosen mode provides default control values for the features mentioned
+above. However it's also possible to override the default values by providing:
+
+- ``kasan.stacktrace=off`` or ``=on`` - enable alloc/free stack collection
+                                       (default: ``on`` for ``mode=full``,
+                                        otherwise ``off``)
+- ``kasan.fault=report`` or ``=panic`` - only print KASAN report or also panic
+                                        (default: ``report``)
+
+If ``kasan.mode`` parameter is not provided, it defaults to ``full`` when
+``CONFIG_DEBUG_KERNEL`` is enabled, and to ``prod`` otherwise.
+
+For developers
+~~~~~~~~~~~~~~
+
+Software KASAN modes use compiler instrumentation to insert validity checks.
+Such instrumentation might be incompatible with some part of the kernel, and
+therefore needs to be disabled. To disable instrumentation for specific files
+or directories, add a line similar to the following to the respective kernel
+Makefile:
+
+- For a single file (e.g. main.o)::
+
+    KASAN_SANITIZE_main.o := n
+
+- For all files in one directory::
+
+    KASAN_SANITIZE := n
 
 
 Implementation details
@@ -160,10 +208,10 @@ Implementation details
 Generic KASAN
 ~~~~~~~~~~~~~
 
-From a high level, our approach to memory error detection is similar to that
-of kmemcheck: use shadow memory to record whether each byte of memory is safe
-to access, and use compile-time instrumentation to insert checks of shadow
-memory on each memory access.
+From a high level perspective, KASAN's approach to memory error detection is
+similar to that of kmemcheck: use shadow memory to record whether each byte of
+memory is safe to access, and use compile-time instrumentation to insert checks
+of shadow memory on each memory access.
 
 Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (e.g. 16TB
 to cover 128TB on x86_64) and uses direct mapping with a scale and offset to
@@ -194,20 +242,30 @@ Generic KASAN also reports the last 2 call stacks to creation of work that
 potentially has access to an object. Call stacks for the following are shown:
 call_rcu() and workqueue queuing.
 
+Generic KASAN is the only mode that delays the reuse of freed object via
+quarantine (see mm/kasan/quarantine.c for implementation).
+
 Software tag-based KASAN
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-Tag-based KASAN uses the Top Byte Ignore (TBI) feature of modern arm64 CPUs to
-store a pointer tag in the top byte of kernel pointers. Like generic KASAN it
-uses shadow memory to store memory tags associated with each 16-byte memory
+Software tag-based KASAN requires software memory tagging support in the form
+of HWASan-like compiler instrumentation (see HWASan documentation for details).
+
+Software tag-based KASAN is currently only implemented for arm64 architecture.
+
+Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs
+to store a pointer tag in the top byte of kernel pointers. Like generic KASAN
+it uses shadow memory to store memory tags associated with each 16-byte memory
 cell (therefore it dedicates 1/16th of the kernel memory for shadow memory).
 
-On each memory allocation tag-based KASAN generates a random tag, tags the
-allocated memory with this tag, and embeds this tag into the returned pointer.
+On each memory allocation software tag-based KASAN generates a random tag, tags
+the allocated memory with this tag, and embeds this tag into the returned
+pointer.
+
 Software tag-based KASAN uses compile-time instrumentation to insert checks
 before each memory access. These checks make sure that tag of the memory that
 is being accessed is equal to tag of the pointer that is used to access this
-memory. In case of a tag mismatch tag-based KASAN prints a bug report.
+memory. In case of a tag mismatch software tag-based KASAN prints a bug report.
 
 Software tag-based KASAN also has two instrumentation modes (outline, that
 emits callbacks to check memory accesses; and inline, that performs the shadow
@@ -216,9 +274,36 @@ simply printed from the function that performs the access check. With inline
 instrumentation a brk instruction is emitted by the compiler, and a dedicated
 brk handler is used to print bug reports.
 
-A potential expansion of this mode is a hardware tag-based mode, which would
-use hardware memory tagging support instead of compiler instrumentation and
-manual shadow memory manipulation.
+Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
+pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently
+reserved to tag freed memory regions.
+
+Software tag-based KASAN currently only supports tagging of
+kmem_cache_alloc/kmalloc and page_alloc memory.
+
+Hardware tag-based KASAN
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hardware tag-based KASAN is similar to the software mode in concept, but uses
+hardware memory tagging support instead of compiler instrumentation and
+shadow memory.
+
+Hardware tag-based KASAN is currently only implemented for arm64 architecture
+and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5
+Instruction Set Architecture, and Top Byte Ignore (TBI).
+
+Special arm64 instructions are used to assign memory tags for each allocation.
+Same tags are assigned to pointers to those allocations. On every memory
+access, hardware makes sure that tag of the memory that is being accessed is
+equal to tag of the pointer that is used to access this memory. In case of a
+tag mismatch a fault is generated and a report is printed.
+
+Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
+pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently
+reserved to tag freed memory regions.
+
+Hardware tag-based KASAN currently only supports tagging of
+kmem_cache_alloc/kmalloc and page_alloc memory.
 
 What memory accesses are sanitised by KASAN?
 --------------------------------------------
@@ -265,17 +350,17 @@ Most mappings in vmalloc space are small, requiring less than a full
 page of shadow space. Allocating a full shadow page per mapping would
 therefore be wasteful. Furthermore, to ensure that different mappings
 use different shadow pages, mappings would have to be aligned to
-``KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE``.
+``KASAN_GRANULE_SIZE * PAGE_SIZE``.
 
-Instead, we share backing space across multiple mappings. We allocate
+Instead, KASAN shares backing space across multiple mappings. It allocates
 a backing page when a mapping in vmalloc space uses a particular page
 of the shadow region. This page can be shared by other vmalloc
 mappings later on.
 
-We hook in to the vmap infrastructure to lazily clean up unused shadow
+KASAN hooks into the vmap infrastructure to lazily clean up unused shadow
 memory.
 
-To avoid the difficulties around swapping mappings around, we expect
+To avoid the difficulties around swapping mappings around, KASAN expects
 that the part of the shadow region that covers the vmalloc space will
 not be covered by the early shadow page, but will be left
 unmapped. This will require changes in arch-specific code.
@@ -286,24 +371,31 @@ architectures that do not have a fixed module region.
 CONFIG_KASAN_KUNIT_TEST & CONFIG_TEST_KASAN_MODULE
 --------------------------------------------------
 
-``CONFIG_KASAN_KUNIT_TEST`` utilizes the KUnit Test Framework for testing.
-This means each test focuses on a small unit of functionality and
-there are a few ways these tests can be run.
+KASAN tests consist on two parts:
+
+1. Tests that are integrated with the KUnit Test Framework. Enabled with
+``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified
+automatically in a few different ways, see the instructions below.
 
-Each test will print the KASAN report if an error is detected and then
-print the number of the test and the status of the test:
+2. Tests that are currently incompatible with KUnit. Enabled with
+``CONFIG_TEST_KASAN_MODULE`` and can only be run as a module. These tests can
+only be verified manually, by loading the kernel module and inspecting the
+kernel log for KASAN reports.
 
-pass::
+Each KUnit-compatible KASAN test prints a KASAN report if an error is detected.
+Then the test prints its number and status.
+
+When a test passes::
 
         ok 28 - kmalloc_double_kzfree
 
-or, if kmalloc failed::
+When a test fails due to a failed ``kmalloc``::
 
         # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
         Expected ptr is not null, but is
         not ok 4 - kmalloc_large_oob_right
 
-or, if a KASAN report was expected, but not found::
+When a test fails due to a missing KASAN report::
 
         # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
         Expected kasan_data->report_expected == kasan_data->report_found, but
@@ -311,46 +403,38 @@ or, if a KASAN report was expected, but not found::
         kasan_data->report_found == 0
         not ok 28 - kmalloc_double_kzfree
 
-All test statuses are tracked as they run and an overall status will
-be printed at the end::
+At the end the cumulative status of all KASAN tests is printed. On success::
 
         ok 1 - kasan
 
-or::
+Or, if one of the tests failed::
 
         not ok 1 - kasan
 
-(1) Loadable Module
-~~~~~~~~~~~~~~~~~~~~
+
+There are a few ways to run KUnit-compatible KASAN tests.
+
+1. Loadable module
+~~~~~~~~~~~~~~~~~~
 
 With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as
-a loadable module and run on any architecture that supports KASAN
-using something like insmod or modprobe. The module is called ``test_kasan``.
+a loadable module and run on any architecture that supports KASAN by loading
+the module with insmod or modprobe. The module is called ``test_kasan``.
 
-(2) Built-In
-~~~~~~~~~~~~~
+2. Built-In
+~~~~~~~~~~~
 
 With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in
-on any architecture that supports KASAN. These and any other KUnit
-tests enabled will run and print the results at boot as a late-init
-call.
+on any architecure that supports KASAN. These and any other KUnit tests enabled
+will run and print the results at boot as a late-init call.
 
-(3) Using kunit_tool
-~~~~~~~~~~~~~~~~~~~~~
+3. Using kunit_tool
+~~~~~~~~~~~~~~~~~~~
 
-With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, we can also
-use kunit_tool to see the results of these along with other KUnit
-tests in a more readable way. This will not print the KASAN reports
-of tests that passed. Use `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_ for more up-to-date
-information on kunit_tool.
+With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it's also
+possible use ``kunit_tool`` to see the results of these and other KUnit tests
+in a more readable way. This will not print the KASAN reports of the tests that
+passed. Use `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_
+for more up-to-date information on ``kunit_tool``.
 
 .. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
-
-``CONFIG_TEST_KASAN_MODULE`` is a set of KASAN tests that could not be
-converted to KUnit. These tests can be run only as a module with
-``CONFIG_TEST_KASAN_MODULE`` built as a loadable module and
-``CONFIG_KASAN`` built-in. The type of error expected and the
-function being run is printed before the expression expected to give
-an error. Then the error is printed, if found, and that test
-should be interpreted to pass only if the error was the one expected
-by the test.
diff --git a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml
new file mode 100644 (file)
index 0000000..0d06387
--- /dev/null
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/adi,axi-clkgen.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Binding for Analog Devices AXI clkgen pcore clock generator
+
+maintainers:
+  - Lars-Peter Clausen <lars@metafoo.de>
+  - Michael Hennerich <michael.hennerich@analog.com>
+
+description: |
+  The axi_clkgen IP core is a software programmable clock generator,
+  that can be synthesized on various FPGA platforms.
+
+  Link: https://wiki.analog.com/resources/fpga/docs/axi_clkgen
+
+properties:
+  compatible:
+    enum:
+      - adi,axi-clkgen-2.00.a
+
+  clocks:
+    description:
+      Specifies the reference clock(s) from which the output frequency is
+      derived. This must either reference one clock if only the first clock
+      input is connected or two if both clock inputs are connected.
+    minItems: 1
+    maxItems: 2
+
+  '#clock-cells':
+    const: 0
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    clock-controller@ff000000 {
+      compatible = "adi,axi-clkgen-2.00.a";
+      #clock-cells = <0>;
+      reg = <0xff000000 0x1000>;
+      clocks = <&osc 1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
deleted file mode 100644 (file)
index aca94fe..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-Binding for the axi-clkgen clock generator
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-- compatible : shall be "adi,axi-clkgen-1.00.a" or "adi,axi-clkgen-2.00.a".
-- #clock-cells : from common clock binding; Should always be set to 0.
-- reg : Address and length of the axi-clkgen register set.
-- clocks : Phandle and clock specifier for the parent clock(s). This must
-       either reference one clock if only the first clock input is connected or two
-       if both clock inputs are connected. For the later case the clock connected
-       to the first input must be specified first.
-
-Optional properties:
-- clock-output-names : From common clock binding.
-
-Example:
-       clock@ff000000 {
-               compatible = "adi,axi-clkgen";
-               #clock-cells = <0>;
-               reg = <0xff000000 0x1000>;
-               clocks = <&osc 1>;
-       };
diff --git a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml
new file mode 100644 (file)
index 0000000..565ca46
--- /dev/null
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/canaan,k210-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 Clock Device Tree Bindings
+
+maintainers:
+  - Damien Le Moal <damien.lemoal@wdc.com>
+
+description: |
+  Canaan Kendryte K210 SoC clocks driver bindings. The clock
+  controller node must be defined as a child node of the K210
+  system controller node.
+
+  See also:
+  - dt-bindings/clock/k210-clk.h
+
+properties:
+  compatible:
+    const: canaan,k210-clk
+
+  clocks:
+    description:
+      Phandle of the SoC 26MHz fixed-rate oscillator clock.
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - '#clock-cells'
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/k210-clk.h>
+    clocks {
+      in0: oscillator {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <26000000>;
+      };
+    };
+
+    /* ... */
+    sysclk: clock-controller {
+      #clock-cells = <1>;
+      compatible = "canaan,k210-clk";
+      clocks = <&in0>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,flexspi-clock.yaml
new file mode 100644 (file)
index 0000000..1fa390e
--- /dev/null
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/fsl,flexspi-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale FlexSPI clock driver for Layerscape SoCs
+
+maintainers:
+  - Michael Walle <michael@walle.cc>
+
+description:
+  The Freescale Layerscape SoCs have a special FlexSPI clock which is
+  derived from the platform PLL.
+
+properties:
+  compatible:
+    enum:
+      - fsl,ls1028a-flexspi-clk
+      - fsl,lx2160a-flexspi-clk
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 0
+
+  clock-output-names:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    dcfg {
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        fspi_clk: clock-controller@900 {
+            compatible = "fsl,ls1028a-flexspi-clk";
+            reg = <0x900 0x4>;
+            #clock-cells = <0>;
+            clocks = <&parentclk>;
+            clock-output-names = "fspi_clk";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,aoncc-sm8250.yaml
new file mode 100644 (file)
index 0000000..c40a74b
--- /dev/null
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,aoncc-sm8250.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Clock bindings for LPASS Always ON Clock Controller on SM8250 SoCs
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+  The clock consumer should specify the desired clock by having the clock
+  ID in its "clocks" phandle cell.
+  See include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h for the full list
+  of Audio Clock controller clock IDs.
+
+properties:
+  compatible:
+    const: qcom,sm8250-lpass-aon
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+  clocks:
+    items:
+      - description: LPASS Core voting clock
+      - description: Glitch Free Mux register clock
+
+  clock-names:
+    items:
+      - const: core
+      - const: bus
+
+required:
+  - compatible
+  - reg
+  - '#clock-cells'
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,sm8250-lpass-aoncc.h>
+    #include <dt-bindings/sound/qcom,q6afe.h>
+    clock-controller@3800000 {
+      #clock-cells = <1>;
+      compatible = "qcom,sm8250-lpass-aon";
+      reg = <0x03380000 0x40000>;
+      clocks = <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+               <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+      clock-names = "core", "bus";
+    };
diff --git a/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml b/Documentation/devicetree/bindings/clock/qcom,audiocc-sm8250.yaml
new file mode 100644 (file)
index 0000000..915d762
--- /dev/null
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,audiocc-sm8250.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Clock bindings for LPASS Audio Clock Controller on SM8250 SoCs
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+  The clock consumer should specify the desired clock by having the clock
+  ID in its "clocks" phandle cell.
+  See include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h for the full list
+  of Audio Clock controller clock IDs.
+
+properties:
+  compatible:
+    const: qcom,sm8250-lpass-audiocc
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+  clocks:
+    items:
+      - description: LPASS Core voting clock
+      - description: Glitch Free Mux register clock
+
+  clock-names:
+    items:
+      - const: core
+      - const: bus
+
+required:
+  - compatible
+  - reg
+  - '#clock-cells'
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,sm8250-lpass-audiocc.h>
+    #include <dt-bindings/sound/qcom,q6afe.h>
+    clock-controller@3300000 {
+      #clock-cells = <1>;
+      compatible = "qcom,sm8250-lpass-audiocc";
+      reg = <0x03300000 0x30000>;
+      clocks = <&q6afecc LPASS_HW_MACRO_VOTE LPASS_CLK_ATTRIBUTE_COUPLE_NO>,
+               <&q6afecc LPASS_CLK_ID_TX_CORE_MCLK LPASS_CLK_ATTRIBUTE_COUPLE_NO>;
+      clock-names = "core", "bus";
+    };
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sdx55.yaml
new file mode 100644 (file)
index 0000000..1121b39
--- /dev/null
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,gcc-sdx55.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Global Clock & Reset Controller Binding for SDX55
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+description: |
+  Qualcomm global clock control module which supports the clocks, resets and
+  power domains on SDX55
+
+  See also:
+  - dt-bindings/clock/qcom,gcc-sdx55.h
+
+properties:
+  compatible:
+    const: qcom,gcc-sdx55
+
+  clocks:
+    items:
+      - description: Board XO source
+      - description: Sleep clock source
+      - description: PLL test clock source (Optional clock)
+    minItems: 2
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: bi_tcxo
+      - const: sleep_clk
+      - const: core_bi_pll_test_se # Optional clock
+    minItems: 2
+    maxItems: 3
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+  '#power-domain-cells':
+    const: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - reg
+  - '#clock-cells'
+  - '#reset-cells'
+  - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    clock-controller@100000 {
+      compatible = "qcom,gcc-sdx55";
+      reg = <0x00100000 0x1f0000>;
+      clocks = <&rpmhcc RPMH_CXO_CLK>,
+               <&sleep_clk>, <&pll_test_clk>;
+      clock-names = "bi_tcxo", "sleep_clk", "core_bi_pll_test_se";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+
+...
index a46a3a7..12c9cbc 100644 (file)
@@ -19,8 +19,10 @@ properties:
     enum:
       - qcom,sc7180-rpmh-clk
       - qcom,sdm845-rpmh-clk
+      - qcom,sdx55-rpmh-clk
       - qcom,sm8150-rpmh-clk
       - qcom,sm8250-rpmh-clk
+      - qcom,sm8350-rpmh-clk
 
   clocks:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-camcc.yaml
new file mode 100644 (file)
index 0000000..f49027e
--- /dev/null
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/qcom,sc7180-camcc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Camera Clock & Reset Controller Binding for SC7180
+
+maintainers:
+  - Taniya Das <tdas@codeaurora.org>
+
+description: |
+  Qualcomm camera clock control module which supports the clocks, resets and
+  power domains on SC7180.
+
+  See also:
+  - dt-bindings/clock/qcom,camcc-sc7180.h
+
+properties:
+  compatible:
+    const: qcom,sc7180-camcc
+
+  clocks:
+    items:
+      - description: Board XO source
+      - description: Camera_ahb clock from GCC
+      - description: Camera XO clock from GCC
+
+  clock-names:
+    items:
+      - const: bi_tcxo
+      - const: iface
+      - const: xo
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+  '#power-domain-cells':
+    const: 1
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#clock-cells'
+  - '#reset-cells'
+  - '#power-domain-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-sc7180.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    clock-controller@ad00000 {
+      compatible = "qcom,sc7180-camcc";
+      reg = <0x0ad00000 0x10000>;
+      clocks = <&rpmhcc RPMH_CXO_CLK>,
+               <&gcc GCC_CAMERA_AHB_CLK>,
+               <&gcc GCC_CAMERA_XO_CLK>;
+      clock-names = "bi_tcxo", "iface", "xo";
+      #clock-cells = <1>;
+      #reset-cells = <1>;
+      #power-domain-cells = <1>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt
deleted file mode 100644 (file)
index da92f57..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-* Renesas R-Car USB 2.0 clock selector
-
-This file provides information on what the device node for the R-Car USB 2.0
-clock selector.
-
-If you connect an external clock to the USB_EXTAL pin only, you should set
-the clock rate to "usb_extal" node only.
-If you connect an oscillator to both the USB_XTAL and USB_EXTAL, this module
-is not needed because this is default setting. (Of course, you can set the
-clock rates to both "usb_extal" and "usb_xtal" nodes.
-
-Case 1: An external clock connects to R-Car SoC
-       +----------+   +--- R-Car ---------------------+
-       |External  |---|USB_EXTAL ---> all usb channels|
-       |clock     |   |USB_XTAL                       |
-       +----------+   +-------------------------------+
-In this case, we need this driver with "usb_extal" clock.
-
-Case 2: An oscillator connects to R-Car SoC
-       +----------+   +--- R-Car ---------------------+
-       |Oscillator|---|USB_EXTAL -+-> all usb channels|
-       |          |---|USB_XTAL --+                   |
-       +----------+   +-------------------------------+
-In this case, we don't need this selector.
-
-Required properties:
-- compatible: "renesas,r8a7795-rcar-usb2-clock-sel" if the device is a part of
-             an R8A7795 SoC.
-             "renesas,r8a7796-rcar-usb2-clock-sel" if the device if a part of
-             an R8A77960 SoC.
-             "renesas,r8a77961-rcar-usb2-clock-sel" if the device if a part of
-             an R8A77961 SoC.
-             "renesas,rcar-gen3-usb2-clock-sel" for a generic R-Car Gen3
-             compatible device.
-
-             When compatible with the generic version, nodes must list the
-             SoC-specific version corresponding to the platform first
-             followed by the generic version.
-
-- reg: offset and length of the USB 2.0 clock selector register block.
-- clocks: A list of phandles and specifier pairs.
-- clock-names: Name of the clocks.
- - The functional clock of USB 2.0 host side must be "ehci_ohci"
- - The functional clock of HS-USB side must be "hs-usb-if"
- - The USB_EXTAL clock pin must be "usb_extal"
- - The USB_XTAL clock pin must be "usb_xtal"
-- #clock-cells: Must be 0
-- power-domains: A phandle and symbolic PM domain specifier.
-                 See power/renesas,rcar-sysc.yaml.
-- resets: A list of phandles and specifier pairs.
-- reset-names: Name of the resets.
- - The reset of USB 2.0 host side must be "ehci_ohci"
- - The reset of HS-USB side must be "hs-usb-if"
-
-Example (R-Car H3):
-
-       usb2_clksel: clock-controller@e6590630 {
-               compatible = "renesas,r8a7795-rcar-usb2-clock-sel",
-                            "renesas,rcar-gen3-usb2-clock-sel";
-               reg = <0 0xe6590630 0 0x02>;
-               clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>,
-                        <&usb_extal>, <&usb_xtal>;
-               clock-names = "ehci_ohci", "hs-usb-if", "usb_extal", "usb_xtal";
-               #clock-cells = <0>;
-               power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
-               resets = <&cpg 703>, <&cpg 704>;
-               reset-names = "ehci_ohci", "hs-usb-if";
-       };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.yaml
new file mode 100644 (file)
index 0000000..5be1229
--- /dev/null
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/clock/renesas,rcar-usb2-clock-sel.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Renesas R-Car USB 2.0 clock selector
+
+maintainers:
+  - Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+description: |
+  If you connect an external clock to the USB_EXTAL pin only, you should set
+  the clock rate to "usb_extal" node only.
+  If you connect an oscillator to both the USB_XTAL and USB_EXTAL, this module
+  is not needed because this is default setting. (Of course, you can set the
+  clock rates to both "usb_extal" and "usb_xtal" nodes.
+
+  Case 1: An external clock connects to R-Car SoC
+    +----------+   +--- R-Car ---------------------+
+    |External  |---|USB_EXTAL ---> all usb channels|
+    |clock     |   |USB_XTAL                       |
+    +----------+   +-------------------------------+
+
+  In this case, we need this driver with "usb_extal" clock.
+
+  Case 2: An oscillator connects to R-Car SoC
+    +----------+   +--- R-Car ---------------------+
+    |Oscillator|---|USB_EXTAL -+-> all usb channels|
+    |          |---|USB_XTAL --+                   |
+    +----------+   +-------------------------------+
+  In this case, we don't need this selector.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r8a7795-rcar-usb2-clock-sel  # R-Car H3
+          - renesas,r8a7796-rcar-usb2-clock-sel  # R-Car M3-W
+          - renesas,r8a77961-rcar-usb2-clock-sel # R-Car M3-W+
+      - const: renesas,rcar-gen3-usb2-clock-sel
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 4
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: ehci_ohci
+      - const: hs-usb-if
+      - const: usb_extal
+      - const: usb_xtal
+
+  '#clock-cells':
+    const: 0
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    minItems: 2
+    maxItems: 2
+
+  reset-names:
+    items:
+      - const: ehci_ohci
+      - const: hs-usb-if
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#clock-cells'
+  - power-domains
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
+    #include <dt-bindings/power/r8a7795-sysc.h>
+
+    usb2_clksel: clock-controller@e6590630 {
+        compatible = "renesas,r8a7795-rcar-usb2-clock-sel",
+                     "renesas,rcar-gen3-usb2-clock-sel";
+        reg = <0xe6590630 0x02>;
+        clocks = <&cpg CPG_MOD 703>, <&cpg CPG_MOD 704>,
+                 <&usb_extal>, <&usb_xtal>;
+        clock-names = "ehci_ohci", "hs-usb-if", "usb_extal", "usb_xtal";
+        #clock-cells = <0>;
+        power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
+        resets = <&cpg 703>, <&cpg 704>;
+        reset-names = "ehci_ohci", "hs-usb-if";
+    };
diff --git a/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml b/Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
new file mode 100644 (file)
index 0000000..e17143c
--- /dev/null
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 SiFive, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/sifive/fu740-prci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive FU740 Power Reset Clock Interrupt Controller (PRCI)
+
+maintainers:
+  - Zong Li <zong.li@sifive.com>
+  - Paul Walmsley  <paul.walmsley@sifive.com>
+
+description:
+  On the FU740 family of SoCs, most system-wide clock and reset integration
+  is via the PRCI IP block.
+  The clock consumer should specify the desired clock via the clock ID
+  macros defined in include/dt-bindings/clock/sifive-fu740-prci.h.
+  These macros begin with PRCI_CLK_.
+
+  The hfclk and rtcclk nodes are required, and represent physical
+  crystals or resonators located on the PCB.  These nodes should be present
+  underneath /, rather than /soc.
+
+properties:
+  compatible:
+    const: sifive,fu740-c000-prci
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: high frequency clock.
+      - description: RTL clock.
+
+  clock-names:
+    items:
+      - const: hfclk
+      - const: rtcclk
+
+  "#clock-cells":
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    prci: clock-controller@10000000 {
+      compatible = "sifive,fu740-c000-prci";
+      reg = <0x10000000 0x1000>;
+      clocks = <&hfclk>, <&rtcclk>;
+      #clock-cells = <1>;
+    };
index f9750b0..27fffaf 100644 (file)
@@ -159,6 +159,8 @@ properties:
       - innolux,g121x1-l03
         # Innolux Corporation 11.6" WXGA (1366x768) TFT LCD panel
       - innolux,n116bge
+        # InnoLux 13.3" FHD (1920x1080) eDP TFT LCD panel
+      - innolux,n125hce-gn1
         # InnoLux 15.6" WXGA TFT LCD panel
       - innolux,n156bge-l21
         # Innolux Corporation 7.0" WSVGA (1024x600) TFT LCD panel
index 183ec23..f5ee23c 100644 (file)
@@ -48,6 +48,7 @@ properties:
       - nxp,pcal6416
       - nxp,pcal6524
       - nxp,pcal9535
+      - nxp,pcal9554b
       - nxp,pcal9555a
       - onnn,cat9554
       - onnn,pca9654
index 08eed23..e506f30 100644 (file)
@@ -13,6 +13,7 @@ Required properties:
 - gpio-controller : Marks the device node as a GPIO controller.
 
 Optional properties:
+- clocks : Input clock specifier. Refer to common clock bindings.
 - interrupts : Interrupt mapping for GPIO IRQ.
 - xlnx,all-inputs : if n-th bit is setup, GPIO-n is input
 - xlnx,dout-default : if n-th bit is 1, GPIO-n default value is 1
@@ -29,6 +30,7 @@ Example:
 gpio: gpio@40000000 {
        #gpio-cells = <2>;
        compatible = "xlnx,xps-gpio-1.00.a";
+       clocks = <&clkc25>;
        gpio-controller ;
        interrupt-parent = <&microblaze_0_intc>;
        interrupts = < 6 2 >;
diff --git a/Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.txt b/Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.txt
deleted file mode 100644 (file)
index e1c49b6..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-Mediatek MT7621 SoC GPIO controller bindings
-
-The IP core used inside these SoCs has 3 banks of 32 GPIOs each.
-The registers of all the banks are interwoven inside one single IO range.
-We load one GPIO controller instance per bank. Also the GPIO controller can receive
-interrupts on any of the GPIOs, either edge or level. It then interrupts the CPU
-using GIC INT12.
-
-Required properties for the top level node:
-- #gpio-cells : Should be two. The first cell is the GPIO pin number and the
-   second cell specifies GPIO flags, as defined in <dt-bindings/gpio/gpio.h>.
-   Only the GPIO_ACTIVE_HIGH and GPIO_ACTIVE_LOW flags are supported.
-- #interrupt-cells : Specifies the number of cells needed to encode an
-   interrupt. Should be 2. The first cell defines the interrupt number,
-   the second encodes the trigger flags encoded as described in
-   Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-- compatible:
-  - "mediatek,mt7621-gpio" for Mediatek controllers
-- reg : Physical base address and length of the controller's registers
-- interrupt-parent : phandle of the parent interrupt controller.
-- interrupts : Interrupt specifier for the controllers interrupt.
-- interrupt-controller : Mark the device node as an interrupt controller.
-- gpio-controller : Marks the device node as a GPIO controller.
-
-Example:
-       gpio@600 {
-               #gpio-cells = <2>;
-               #interrupt-cells = <2>;
-               compatible = "mediatek,mt7621-gpio";
-               gpio-controller;
-               interrupt-controller;
-               reg = <0x600 0x100>;
-               interrupt-parent = <&gic>;
-               interrupts = <GIC_SHARED 12 IRQ_TYPE_LEVEL_HIGH>;
-       };
diff --git a/Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.yaml b/Documentation/devicetree/bindings/gpio/mediatek,mt7621-gpio.yaml
new file mode 100644 (file)
index 0000000..5bbb2a3
--- /dev/null
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/mediatek,mt7621-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek MT7621 SoC GPIO controller
+
+maintainers:
+  - Sergio Paracuellos <sergio.paracuellos@gmail.com>
+
+description: |
+  The IP core used inside these SoCs has 3 banks of 32 GPIOs each.
+  The registers of all the banks are interwoven inside one single IO range.
+  We load one GPIO controller instance per bank. Also the GPIO controller can receive
+  interrupts on any of the GPIOs, either edge or level. It then interrupts the CPU
+  using GIC INT12.
+
+properties:
+  $nodename:
+    pattern: "^gpio@[0-9a-f]+$"
+
+  compatible:
+    const: mediatek,mt7621-gpio
+
+  reg:
+    maxItems: 1
+
+  "#gpio-cells":
+    const: 2
+
+  gpio-controller: true
+  gpio-ranges: true
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - "#gpio-cells"
+  - gpio-controller
+  - gpio-ranges
+  - interrupt-controller
+  - "#interrupt-cells"
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/mips-gic.h>
+
+    gpio@600 {
+      compatible = "mediatek,mt7621-gpio";
+      reg = <0x600 0x100>;
+      #gpio-cells = <2>;
+      gpio-controller;
+      gpio-ranges = <&pinctrl 0 0 95>;
+      interrupt-controller;
+      #interrupt-cells = <2>;
+      interrupt-parent = <&gic>;
+      interrupts = <GIC_SHARED 12 IRQ_TYPE_LEVEL_HIGH>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/gpio/mstar,msc313-gpio.yaml b/Documentation/devicetree/bindings/gpio/mstar,msc313-gpio.yaml
new file mode 100644 (file)
index 0000000..1f2ef40
--- /dev/null
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/mstar,msc313-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar/SigmaStar GPIO controller
+
+maintainers:
+  - Daniel Palmer <daniel@thingy.jp>
+
+properties:
+  $nodename:
+    pattern: "^gpio@[0-9a-f]+$"
+
+  compatible:
+    const: mstar,msc313-gpio
+
+  reg:
+    maxItems: 1
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+
+  gpio-ranges: true
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - gpio-controller
+  - "#gpio-cells"
+  - interrupt-controller
+  - "#interrupt-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/msc313-gpio.h>
+
+    gpio: gpio@207800 {
+      compatible = "mstar,msc313e-gpio";
+      #gpio-cells = <2>;
+      reg = <0x207800 0x200>;
+      gpio-controller;
+      gpio-ranges = <&pinctrl 0 36 22>,
+                    <&pinctrl 22 63 4>,
+                    <&pinctrl 26 68 6>;
+      #interrupt-cells = <2>;
+      interrupt-controller;
+      interrupt-parent = <&intc_fiq>;
+    };
diff --git a/Documentation/devicetree/bindings/i3c/mipi-i3c-hci.yaml b/Documentation/devicetree/bindings/i3c/mipi-i3c-hci.yaml
new file mode 100644 (file)
index 0000000..07a7b10
--- /dev/null
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/i3c/mipi-i3c-hci.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: MIPI I3C HCI Device Tree Bindings
+
+maintainers:
+  - Nicolas Pitre <npitre@baylibre.com>
+
+description: |
+  MIPI I3C Host Controller Interface
+
+  The MIPI I3C HCI (Host Controller Interface) specification defines
+  a common software driver interface to support compliant MIPI I3C
+  host controller hardware implementations from multiple vendors.
+
+  The hardware is self-advertising for differences in implementation
+  capabilities, including the spec version it is based on, so there
+  isn't much to describe here (yet).
+
+  For details, please see:
+  https://www.mipi.org/specifications/i3c-hci
+
+properties:
+  compatible:
+    const: mipi-i3c-hci
+  reg:
+    maxItems: 1
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    i3c@a0000000 {
+      compatible = "mipi-i3c-hci";
+      reg = <0xa0000000 0x2000>;
+      interrupts = <89>;
+    };
index 4d530d8..c5de7b5 100644 (file)
@@ -7,7 +7,9 @@ The reset registers are both present in the MSCC vcoreiii MIPS and
 microchip Sparx5 armv8 SoC's.
 
 Required Properties:
- - compatible: "mscc,ocelot-chip-reset" or "microchip,sparx5-chip-reset"
+
+ - compatible: "mscc,ocelot-chip-reset", "mscc,luton-chip-reset",
+   "mscc,jaguar2-chip-reset" or "microchip,sparx5-chip-reset"
 
 Example:
        reset@1070008 {
diff --git a/Documentation/devicetree/bindings/power/reset/regulator-poweroff.yaml b/Documentation/devicetree/bindings/power/reset/regulator-poweroff.yaml
new file mode 100644 (file)
index 0000000..03bd1fa
--- /dev/null
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/regulator-poweroff.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Force-disable power regulator to turn the power off.
+
+maintainers:
+  - Michael Klein <michael@fossekall.de>
+
+description: |
+  When the power-off handler is called, a power regulator is disabled by
+  calling regulator_force_disable(). If the power is still on and the
+  CPU still running after a 3000ms delay, a warning is emitted.
+
+properties:
+  compatible:
+    const: "regulator-poweroff"
+
+  cpu-supply:
+    description:
+      regulator to disable on power-down
+
+required:
+  - compatible
+  - cpu-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    regulator-poweroff {
+        compatible = "regulator-poweroff";
+        cpu-supply = <&reg_vcc1v2>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt
deleted file mode 100644 (file)
index 985fcc6..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Atmel TCB PWM controller
-
-Required properties:
-- compatible: should be "atmel,tcb-pwm"
-- #pwm-cells: should be 3. See pwm.yaml in this directory for a description of
-  the cells format. The only third cell flag supported by this binding is
-  PWM_POLARITY_INVERTED.
-- tc-block: The Timer Counter block to use as a PWM chip.
-
-Example:
-
-pwm {
-       compatible = "atmel,tcb-pwm";
-       #pwm-cells = <3>;
-       tc-block = <1>;
-};
diff --git a/Documentation/devicetree/bindings/pwm/intel,keembay-pwm.yaml b/Documentation/devicetree/bindings/pwm/intel,keembay-pwm.yaml
new file mode 100644 (file)
index 0000000..ff6880a
--- /dev/null
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2020 Intel Corporation
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/intel,keembay-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Keem Bay PWM Device Tree Bindings
+
+maintainers:
+  - Vijayakannan Ayyathurai <vijayakannan.ayyathurai@intel.com>
+
+allOf:
+  - $ref: pwm.yaml#
+
+properties:
+  compatible:
+    enum:
+      - intel,keembay-pwm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  "#pwm-cells":
+    const: 2
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - '#pwm-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #define KEEM_BAY_A53_GPIO
+
+    pwm@203200a0 {
+        compatible = "intel,keembay-pwm";
+        reg = <0x203200a0 0xe8>;
+        clocks = <&scmi_clk KEEM_BAY_A53_GPIO>;
+        #pwm-cells = <2>;
+    };
diff --git a/Documentation/devicetree/bindings/pwm/intel,lgm-pwm.yaml b/Documentation/devicetree/bindings/pwm/intel,lgm-pwm.yaml
new file mode 100644 (file)
index 0000000..11a6065
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/intel,lgm-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LGM SoC PWM fan controller
+
+maintainers:
+  - Rahul Tanwar <rtanwar@maxlinear.com>
+
+properties:
+  compatible:
+    const: intel,lgm-pwm
+
+  reg:
+    maxItems: 1
+
+  "#pwm-cells":
+    const: 2
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    pwm: pwm@e0d00000 {
+        compatible = "intel,lgm-pwm";
+        reg = <0xe0d00000 0x30>;
+        #pwm-cells = <2>;
+        clocks = <&cgu0 126>;
+        resets = <&rcu0 0x30 21>;
+    };
index 29adff5..25ed214 100644 (file)
@@ -7,6 +7,7 @@ Required properties:
    - "mediatek,mt7623-pwm": found on mt7623 SoC.
    - "mediatek,mt7628-pwm": found on mt7628 SoC.
    - "mediatek,mt7629-pwm": found on mt7629 SoC.
+   - "mediatek,mt8183-pwm": found on mt8183 SoC.
    - "mediatek,mt8516-pwm": found on mt8516 SoC.
  - reg: physical base address and length of the controller's registers.
  - #pwm-cells: must be 2. See pwm.yaml in this directory for a description of
index 0521957..902b271 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
  - compatible: should be "mediatek,<name>-disp-pwm":
    - "mediatek,mt2701-disp-pwm": found on mt2701 SoC.
    - "mediatek,mt6595-disp-pwm": found on mt6595 SoC.
+   - "mediatek,mt8167-disp-pwm", "mediatek,mt8173-disp-pwm": found on mt8167 SoC.
    - "mediatek,mt8173-disp-pwm": found on mt8173 SoC.
  - reg: physical base address and length of the controller's registers.
  - #pwm-cells: must be 2. See pwm.yaml in this directory for a description of
index 8acd2de..d30dc04 100644 (file)
@@ -63,6 +63,11 @@ properties:
     description:
       Enables wake up of host system on alarm.
 
+  reset-source:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      The RTC is able to reset the machine.
+
 additionalProperties: true
 
 ...
index 55fffae..597d67f 100644 (file)
@@ -59,6 +59,7 @@ patternProperties:
         items:
           - enum:
               - atmel,tcb-timer
+              - atmel,tcb-pwm
               - microchip,tcb-capture
       reg:
         description:
@@ -68,10 +69,35 @@ patternProperties:
 
         minItems: 1
         maxItems: 3
+    required:
+      - compatible
+      - reg
+
+  "^pwm@[0-2]$":
+    description: The timer block channels that are used as PWMs.
+    $ref: ../../pwm/pwm.yaml#
+    type: object
+    properties:
+      compatible:
+        const: atmel,tcb-pwm
+      reg:
+        description:
+          TCB channel to use for this PWM.
+        enum: [ 0, 1, 2 ]
+
+      "#pwm-cells":
+        description:
+          The only third cell flag supported by this binding is
+          PWM_POLARITY_INVERTED.
+        const: 3
 
     required:
       - compatible
       - reg
+      - "#pwm-cells"
+
+    additionalProperties: false
+
 
 allOf:
   - if:
@@ -158,7 +184,13 @@ examples:
                         compatible = "atmel,tcb-timer";
                         reg = <1>;
                 };
-        };
+
+                pwm@2 {
+                        compatible = "atmel,tcb-pwm";
+                        reg = <2>;
+                        #pwm-cells = <3>;
+                };
+         };
     /* TCB0 Capture with QDEC: */
         timer@f800c000 {
                 compatible = "atmel,at91rm9200-tcb", "simple-mfd", "syscon";
index ffaf833..041ae90 100644 (file)
@@ -187,6 +187,8 @@ patternProperties:
     description: CALAO Systems SAS
   "^calxeda,.*":
     description: Calxeda
+  "^canaan,.*":
+    description: Canaan, Inc.
   "^caninos,.*":
     description: Caninos Loucos Program
   "^capella,.*":
index e8f2263..5ac607d 100644 (file)
@@ -22,6 +22,9 @@ properties:
           - const: allwinner,sun50i-a64-wdt
           - const: allwinner,sun6i-a31-wdt
       - items:
+          - const: allwinner,sun50i-a100-wdt
+          - const: allwinner,sun6i-a31-wdt
+      - items:
           - const: allwinner,sun50i-h6-wdt
           - const: allwinner,sun6i-a31-wdt
       - items:
index d9fc7bb..f7ee922 100644 (file)
@@ -14,7 +14,15 @@ maintainers:
 
 properties:
   compatible:
-    const: snps,dw-wdt
+    oneOf:
+      - const: snps,dw-wdt
+      - items:
+          - enum:
+              - rockchip,rk3066-wdt
+              - rockchip,rk3188-wdt
+              - rockchip,rk3288-wdt
+              - rockchip,rk3368-wdt
+          - const: snps,dw-wdt
 
   reg:
     maxItems: 1
index d6b2a19..a2133d6 100644 (file)
@@ -190,7 +190,7 @@ DMA Fence uABI/Sync File
 Indefinite DMA Fences
 ~~~~~~~~~~~~~~~~~~~~~
 
-At various times &dma_fence with an indefinite time until dma_fence_wait()
+At various times struct dma_fence with an indefinite time until dma_fence_wait()
 finishes have been proposed. Examples include:
 
 * Future fences, used in HWC1 to signal when a buffer isn't used by the display
index bb67657..cd8b6e6 100644 (file)
@@ -411,6 +411,12 @@ RESET
   devm_reset_control_get()
   devm_reset_controller_register()
 
+RTC
+  devm_rtc_device_register()
+  devm_rtc_allocate_device()
+  devm_rtc_register_device()
+  devm_rtc_nvmem_register()
+
 SERDEV
   devm_serdev_device_open()
 
index 423492d..173e4c7 100644 (file)
@@ -440,18 +440,20 @@ For details refer to Documentation/firmware-guide/acpi/gpio-properties.rst
 
 Interacting With the Legacy GPIO Subsystem
 ==========================================
-Many kernel subsystems still handle GPIOs using the legacy integer-based
-interface. Although it is strongly encouraged to upgrade them to the safer
-descriptor-based API, the following two functions allow you to convert a GPIO
-descriptor into the GPIO integer namespace and vice-versa::
+Many kernel subsystems and drivers still handle GPIOs using the legacy
+integer-based interface. It is strongly recommended to update these to the new
+gpiod interface. For cases where both interfaces need to be used, the following
+two functions allow to convert a GPIO descriptor into the GPIO integer namespace
+and vice-versa::
 
        int desc_to_gpio(const struct gpio_desc *desc)
        struct gpio_desc *gpio_to_desc(unsigned gpio)
 
-The GPIO number returned by desc_to_gpio() can be safely used as long as the
-GPIO descriptor has not been freed. All the same, a GPIO number passed to
-gpio_to_desc() must have been properly acquired, and usage of the returned GPIO
-descriptor is only possible after the GPIO number has been released.
+The GPIO number returned by desc_to_gpio() can safely be used as a parameter of
+the gpio\_*() functions for as long as the GPIO descriptor `desc` is not freed.
+All the same, a GPIO number passed to gpio_to_desc() must first be properly
+acquired using e.g. gpio_request_one(), and the returned GPIO descriptor is only
+considered valid until that GPIO number is released using gpio_free().
 
 Freeing a GPIO obtained by one API with the other API is forbidden and an
 unchecked error.
index 072a745..0fb57e2 100644 (file)
@@ -416,7 +416,8 @@ The preferred way to set up the helpers is to fill in the
 struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
 If you do this, the additional irq_chip will be set up by gpiolib at the
 same time as setting up the rest of the GPIO functionality. The following
-is a typical example of a cascaded interrupt handler using gpio_irq_chip:
+is a typical example of a chained cascaded interrupt handler using
+the gpio_irq_chip:
 
 .. code-block:: c
 
@@ -452,7 +453,46 @@ is a typical example of a cascaded interrupt handler using gpio_irq_chip:
 
   return devm_gpiochip_add_data(dev, &g->gc, g);
 
-The helper support using hierarchical interrupt controllers as well.
+The helper supports using threaded interrupts as well. Then you just request
+the interrupt separately and go with it:
+
+.. code-block:: c
+
+  /* Typical state container with dynamic irqchip */
+  struct my_gpio {
+      struct gpio_chip gc;
+      struct irq_chip irq;
+  };
+
+  int irq; /* from platform etc */
+  struct my_gpio *g;
+  struct gpio_irq_chip *girq;
+
+  /* Set up the irqchip dynamically */
+  g->irq.name = "my_gpio_irq";
+  g->irq.irq_ack = my_gpio_ack_irq;
+  g->irq.irq_mask = my_gpio_mask_irq;
+  g->irq.irq_unmask = my_gpio_unmask_irq;
+  g->irq.irq_set_type = my_gpio_set_irq_type;
+
+  ret = devm_request_threaded_irq(dev, irq, NULL,
+               irq_thread_fn, IRQF_ONESHOT, "my-chip", g);
+  if (ret < 0)
+       return ret;
+
+  /* Get a pointer to the gpio_irq_chip */
+  girq = &g->gc.irq;
+  girq->chip = &g->irq;
+  /* This will let us handle the parent IRQ in the driver */
+  girq->parent_handler = NULL;
+  girq->num_parents = 0;
+  girq->parents = NULL;
+  girq->default_type = IRQ_TYPE_NONE;
+  girq->handler = handle_bad_irq;
+
+  return devm_gpiochip_add_data(dev, &g->gc, g);
+
+The helper supports using hierarchical interrupt controllers as well.
 In this case the typical set-up will look like this:
 
 .. code-block:: c
@@ -493,32 +533,13 @@ the parent hardware irq from a child (i.e. this gpio chip) hardware irq.
 As always it is good to look at examples in the kernel tree for advice
 on how to find the required pieces.
 
-The old way of adding irqchips to gpiochips after registration is also still
-available but we try to move away from this:
-
-- DEPRECATED: gpiochip_irqchip_add(): adds a chained cascaded irqchip to a
-  gpiochip. It will pass the struct gpio_chip* for the chip to all IRQ
-  callbacks, so the callbacks need to embed the gpio_chip in its state
-  container and obtain a pointer to the container using container_of().
-  (See Documentation/driver-api/driver-model/design-patterns.rst)
-
-- gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
-  as discussed above regarding different types of cascaded irqchips. The
-  cascaded irq has to be handled by a threaded interrupt handler.
-  Apart from that it works exactly like the chained irqchip.
-
-- gpiochip_set_nested_irqchip(): sets up a nested cascaded irq handler for a
-  gpio_chip from a parent IRQ. As the parent IRQ has usually been
-  explicitly requested by the driver, this does very little more than
-  mark all the child IRQs as having the other IRQ as parent.
-
 If there is a need to exclude certain GPIO lines from the IRQ domain handled by
 these helpers, we can set .irq.need_valid_mask of the gpiochip before
 devm_gpiochip_add_data() or gpiochip_add_data() is called. This allocates an
 .irq.valid_mask with as many bits set as there are GPIO lines in the chip, each
 bit representing line 0..n-1. Drivers can exclude GPIO lines by clearing bits
-from this mask. The mask must be filled in before gpiochip_irqchip_add() or
-gpiochip_irqchip_add_nested() is called.
+from this mask. The mask can be filled in the init_valid_mask() callback
+that is part of the struct gpio_irq_chip.
 
 To use the helpers please keep the following in mind:
 
index c3fe9b2..b2288dc 100644 (file)
@@ -8,7 +8,7 @@
     -----------------------
     |       alpha: | TODO |
     |         arc: | TODO |
-    |         arm: | TODO |
+    |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
     |        csky: | TODO |
index d9082b9..6fc03de 100644 (file)
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ..  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ..  |
     |          sh: | TODO |
     |       sparc: |  ..  |
index 8d1ab58..1bc48a1 100644 (file)
@@ -1,53 +1,52 @@
 .. SPDX-License-Identifier: GPL-2.0
 
-==================
-Global File System
-==================
+====================
+Global File System 2
+====================
 
-https://fedorahosted.org/cluster/wiki/HomePage
-
-GFS is a cluster file system. It allows a cluster of computers to
+GFS2 is a cluster file system. It allows a cluster of computers to
 simultaneously use a block device that is shared between them (with FC,
-iSCSI, NBD, etc).  GFS reads and writes to the block device like a local
+iSCSI, NBD, etc).  GFS2 reads and writes to the block device like a local
 file system, but also uses a lock module to allow the computers coordinate
 their I/O so file system consistency is maintained.  One of the nifty
-features of GFS is perfect consistency -- changes made to the file system
+features of GFS2 is perfect consistency -- changes made to the file system
 on one machine show up immediately on all other machines in the cluster.
 
-GFS uses interchangeable inter-node locking mechanisms, the currently
+GFS2 uses interchangeable inter-node locking mechanisms, the currently
 supported mechanisms are:
 
   lock_nolock
-    - allows gfs to be used as a local file system
+    - allows GFS2 to be used as a local file system
 
   lock_dlm
-    - uses a distributed lock manager (dlm) for inter-node locking.
+    - uses the distributed lock manager (dlm) for inter-node locking.
       The dlm is found at linux/fs/dlm/
 
-Lock_dlm depends on user space cluster management systems found
+lock_dlm depends on user space cluster management systems found
 at the URL above.
 
-To use gfs as a local file system, no external clustering systems are
+To use GFS2 as a local file system, no external clustering systems are
 needed, simply::
 
   $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
   $ mount -t gfs2 /dev/block_device /dir
 
-If you are using Fedora, you need to install the gfs2-utils package
-and, for lock_dlm, you will also need to install the cman package
-and write a cluster.conf as per the documentation. For F17 and above
-cman has been replaced by the dlm package.
+The gfs2-utils package is required on all cluster nodes and, for lock_dlm, you
+will also need the dlm and corosync user space utilities configured as per the
+documentation.
+
+gfs2-utils can be found at https://pagure.io/gfs2-utils
 
 GFS2 is not on-disk compatible with previous versions of GFS, but it
 is pretty close.
 
-The following man pages can be found at the URL above:
+The following man pages are available from gfs2-utils:
 
   ============         =============================================
   fsck.gfs2            to repair a filesystem
   gfs2_grow            to expand a filesystem online
   gfs2_jadd            to add journals to a filesystem online
   tunegfs2             to manipulate, examine and tune a filesystem
-  gfs2_convert         to convert a gfs filesystem to gfs2 in-place
+  gfs2_convert         to convert a gfs filesystem to GFS2 in-place
   mkfs.gfs2            to make a filesystem
   ============         =============================================
index 1cf1aeb..226ae07 100644 (file)
@@ -553,6 +553,41 @@ with "depends on m".  E.g.::
 
 limits FOO to module (=m) or disabled (=n).
 
+Compile-testing
+~~~~~~~~~~~~~~~
+If a config symbol has a dependency, but the code controlled by the config
+symbol can still be compiled if the dependency is not met, it is encouraged to
+increase build coverage by adding an "|| COMPILE_TEST" clause to the
+dependency. This is especially useful for drivers for more exotic hardware, as
+it allows continuous-integration systems to compile-test the code on a more
+common system, and detect bugs that way.
+Note that compile-tested code should avoid crashing when run on a system where
+the dependency is not met.
+
+Architecture and platform dependencies
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Due to the presence of stubs, most drivers can now be compiled on most
+architectures. However, this does not mean it makes sense to have all drivers
+available everywhere, as the actual hardware may only exist on specific
+architectures and platforms. This is especially true for on-SoC IP cores,
+which may be limited to a specific vendor or SoC family.
+
+To prevent asking the user about drivers that cannot be used on the system(s)
+the user is compiling a kernel for, and if it makes sense, config symbols
+controlling the compilation of a driver should contain proper dependencies,
+limiting the visibility of the symbol to (a superset of) the platform(s) the
+driver can be used on. The dependency can be an architecture (e.g. ARM) or
+platform (e.g. ARCH_OMAP4) dependency. This makes life simpler not only for
+distro config owners, but also for every single developer or user who
+configures a kernel.
+
+Such a dependency can be relaxed by combining it with the compile-testing rule
+above, leading to:
+
+  config FOO
+       bool "Support for foo hardware"
+       depends on ARCH_FOO_VENDOR || COMPILE_TEST
+
 Kconfig recursive dependency limitations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
index 8b413ef..6163467 100644 (file)
@@ -97,7 +97,7 @@ Like Make, Kconfig provides several built-in functions. Every function takes a
 particular number of arguments.
 
 In Make, every built-in function takes at least one argument. Kconfig allows
-zero argument for built-in functions, such as $(fileno), $(lineno). You could
+zero argument for built-in functions, such as $(filename), $(lineno). You could
 consider those as "built-in variable", but it is just a matter of how we call
 it after all. Let's say "built-in function" here to refer to natively supported
 functionality.
index 0d5dd54..d36768c 100644 (file)
@@ -15,13 +15,15 @@ This document describes the Linux kernel Makefiles.
           --- 3.4 Objects which export symbols
           --- 3.5 Library file goals - lib-y
           --- 3.6 Descending down in directories
-          --- 3.7 Compilation flags
-          --- 3.8 <deleted>
-          --- 3.9 Dependency tracking
-          --- 3.10 Special Rules
-          --- 3.11 $(CC) support functions
-          --- 3.12 $(LD) support functions
-          --- 3.13 Script Invocation
+          --- 3.7 Non-builtin vmlinux targets - extra-y
+          --- 3.8 Always built goals - always-y
+          --- 3.9 Compilation flags
+          --- 3.10 Dependency tracking
+          --- 3.11 Custom Rules
+          --- 3.12 Command change detection
+          --- 3.13 $(CC) support functions
+          --- 3.14 $(LD) support functions
+          --- 3.15 Script Invocation
 
        === 4 Host Program support
           --- 4.1 Simple Host Program
@@ -46,7 +48,7 @@ This document describes the Linux kernel Makefiles.
           --- 7.5 Architecture-specific boot images
           --- 7.6 Building non-kbuild targets
           --- 7.7 Commands useful for building a boot image
-          --- 7.8 Custom kbuild commands
+          --- 7.8 <deleted>
           --- 7.9 Preprocessing linker scripts
           --- 7.10 Generic header files
           --- 7.11 Post-link pass
@@ -67,11 +69,11 @@ This document describes the Linux kernel Makefiles.
 
 The Makefiles have five parts::
 
-       Makefile                the top Makefile.
-       .config                 the kernel configuration file.
-       arch/$(ARCH)/Makefile   the arch Makefile.
-       scripts/Makefile.*      common rules etc. for all kbuild Makefiles.
-       kbuild Makefiles        there are about 500 of these.
+       Makefile                    the top Makefile.
+       .config                     the kernel configuration file.
+       arch/$(SRCARCH)/Makefile    the arch Makefile.
+       scripts/Makefile.*          common rules etc. for all kbuild Makefiles.
+       kbuild Makefiles            exist in every subdirectory
 
 The top Makefile reads the .config file, which comes from the kernel
 configuration process.
@@ -82,7 +84,7 @@ It builds these goals by recursively descending into the subdirectories of
 the kernel source tree.
 The list of subdirectories which are visited depends upon the kernel
 configuration. The top Makefile textually includes an arch Makefile
-with the name arch/$(ARCH)/Makefile. The arch Makefile supplies
+with the name arch/$(SRCARCH)/Makefile. The arch Makefile supplies
 architecture-specific information to the top Makefile.
 
 Each subdirectory has a kbuild Makefile which carries out the commands
@@ -278,7 +280,7 @@ more details, with real examples.
        actually recognize that there is a lib.a being built, the directory
        shall be listed in libs-y.
 
-       See also "6.4 List directories to visit when descending".
+       See also "7.4 List directories to visit when descending".
 
        Use of lib-y is normally restricted to `lib/` and `arch/*/lib`.
 
@@ -317,11 +319,79 @@ more details, with real examples.
        that directory specifies obj-y, those objects will be left orphan.
        It is very likely a bug of the Makefile or of dependencies in Kconfig.
 
+       Kbuild also supports dedicated syntax, subdir-y and subdir-m, for
+       descending into subdirectories. It is a good fit when you know they
+       do not contain kernel-space objects at all. A typical usage is to let
+       Kbuild descend into subdirectories to build tools.
+
+       Examples::
+
+               # scripts/Makefile
+               subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
+               subdir-$(CONFIG_MODVERSIONS) += genksyms
+               subdir-$(CONFIG_SECURITY_SELINUX) += selinux
+
+       Unlike obj-y/m, subdir-y/m does not need the trailing slash since this
+       syntax is always used for directories.
+
        It is good practice to use a `CONFIG_` variable when assigning directory
        names. This allows kbuild to totally skip the directory if the
        corresponding `CONFIG_` option is neither 'y' nor 'm'.
 
-3.7 Compilation flags
+3.7 Non-builtin vmlinux targets - extra-y
+-----------------------------------------
+
+       extra-y specifies targets which are needed for building vmlinux,
+       but not combined into built-in.a.
+
+       Examples are:
+
+       1) head objects
+
+           Some objects must be placed at the head of vmlinux. They are
+           directly linked to vmlinux without going through built-in.a
+           A typical use-case is an object that contains the entry point.
+
+           arch/$(SRCARCH)/Makefile should specify such objects as head-y.
+
+           Discussion:
+             Given that we can control the section order in the linker script,
+             why do we need head-y?
+
+       2) vmlinux linker script
+
+           The linker script for vmlinux is located at
+           arch/$(SRCARCH)/kernel/vmlinux.lds
+
+       Example::
+
+               # arch/x86/kernel/Makefile
+               extra-y := head_$(BITS).o
+               extra-y += head$(BITS).o
+               extra-y += ebda.o
+               extra-y += platform-quirks.o
+               extra-y += vmlinux.lds
+
+       $(extra-y) should only contain targets needed for vmlinux.
+
+       Kbuild skips extra-y when vmlinux is apparently not a final goal.
+       (e.g. 'make modules', or building external modules)
+
+       If you intend to build targets unconditionally, always-y (explained
+       in the next section) is the correct syntax to use.
+
+3.8 Always built goals - always-y
+---------------------------------
+
+       always-y specifies targets which are literally always built when
+       Kbuild visits the Makefile.
+
+       Example::
+         # ./Kbuild
+         offsets-file := include/generated/asm-offsets.h
+         always-y += $(offsets-file)
+
+3.9 Compilation flags
 ---------------------
 
     ccflags-y, asflags-y and ldflags-y
@@ -410,8 +480,8 @@ more details, with real examples.
                AFLAGS_iwmmxt.o      := -Wa,-mcpu=iwmmxt
 
 
-3.9 Dependency tracking
------------------------
+3.10 Dependency tracking
+------------------------
 
        Kbuild tracks dependencies on the following:
 
@@ -422,21 +492,21 @@ more details, with real examples.
        Thus, if you change an option to $(CC) all affected files will
        be re-compiled.
 
-3.10 Special Rules
-------------------
+3.11 Custom Rules
+-----------------
 
-       Special rules are used when the kbuild infrastructure does
+       Custom rules are used when the kbuild infrastructure does
        not provide the required support. A typical example is
        header files generated during the build process.
        Another example are the architecture-specific Makefiles which
-       need special rules to prepare boot images etc.
+       need custom rules to prepare boot images etc.
 
-       Special rules are written as normal Make rules.
+       Custom rules are written as normal Make rules.
        Kbuild is not executing in the directory where the Makefile is
-       located, so all special rules shall provide a relative
+       located, so all custom rules shall use a relative
        path to prerequisite files and target files.
 
-       Two variables are used when defining special rules:
+       Two variables are used when defining custom rules:
 
        $(src)
            $(src) is a relative path which points to the directory
@@ -454,7 +524,7 @@ more details, with real examples.
                $(obj)/53c8xx_d.h: $(src)/53c7,8xx.scr $(src)/script_asm.pl
                        $(CPP) -DCHIP=810 - < $< | ... $(src)/script_asm.pl
 
-           This is a special rule, following the normal syntax
+           This is a custom rule, following the normal syntax
            required by make.
 
            The target file depends on two prerequisite files. References
@@ -471,13 +541,81 @@ more details, with real examples.
 
        Example::
 
-               #arch/blackfin/boot/Makefile
-               $(obj)/vmImage: $(obj)/vmlinux.gz
-                       $(call if_changed,uimage)
-                       @$(kecho) 'Kernel: $@ is ready'
+               # arch/arm/Makefile
+               $(BOOT_TARGETS): vmlinux
+                       $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@
+                       @$(kecho) '  Kernel: $(boot)/$@ is ready'
+
+       When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand
+       of a command is normally displayed.
+       To enable this behaviour for custom commands kbuild requires
+       two variables to be set::
+
+               quiet_cmd_<command>     - what shall be echoed
+                     cmd_<command>     - the command to execute
+
+       Example::
+
+               # lib/Makefile
+               quiet_cmd_crc32 = GEN     $@
+                     cmd_crc32 = $< > $@
+
+               $(obj)/crc32table.h: $(obj)/gen_crc32table
+                       $(call cmd,crc32)
+
+       When updating the $(obj)/crc32table.h target, the line:
+
+                 GEN     lib/crc32table.h
+
+       will be displayed with "make KBUILD_VERBOSE=0".
+
+3.12 Command change detection
+-----------------------------
+
+       When the rule is evaluated, timestamps are compared between the target
+       and its prerequisite files. GNU Make updates the target when any of the
+       prerequisites is newer than that.
+
+       The target should be rebuilt also when the command line has changed
+       since the last invocation. This is not supported by Make itself, so
+       Kbuild achieves this by a kind of meta-programming.
+
+       if_changed is the macro used for this purpose, in the following form::
+
+               quiet_cmd_<command> = ...
+                     cmd_<command> = ...
+
+               <target>: <source(s)> FORCE
+                       $(call if_changed,<command>)
+
+       Any target that utilizes if_changed must be listed in $(targets),
+       otherwise the command line check will fail, and the target will
+       always be built.
+
+       If the target is already listed in the recognized syntax such as
+       obj-y/m, lib-y/m, extra-y/m, always-y/m, hostprogs, userprogs, Kbuild
+       automatically adds it to $(targets). Otherwise, the target must be
+       explicitly added to $(targets).
+
+       Assignments to $(targets) are without $(obj)/ prefix. if_changed may be
+       used in conjunction with custom rules as defined in "3.9 Custom Rules".
+
+       Note: It is a typical mistake to forget the FORCE prerequisite.
+       Another common pitfall is that whitespace is sometimes significant; for
+       instance, the below will fail (note the extra space after the comma)::
+
+               target: source(s) FORCE
+
+       **WRONG!**      $(call if_changed, objcopy)
 
+       Note:
+               if_changed should not be used more than once per target.
+               It stores the executed command in a corresponding .cmd
+               file and multiple calls would result in overwrites and
+               unwanted results when the target is up to date and only the
+               tests on changed commands trigger execution of commands.
 
-3.11 $(CC) support functions
+3.13 $(CC) support functions
 ----------------------------
 
        The kernel may be built with several different versions of
@@ -592,7 +730,7 @@ more details, with real examples.
                        endif
                endif
 
-3.12 $(LD) support functions
+3.14 $(LD) support functions
 ----------------------------
 
     ld-option
@@ -606,7 +744,7 @@ more details, with real examples.
                #Makefile
                LDFLAGS_vmlinux += $(call ld-option, -X)
 
-3.13 Script invocation
+3.15 Script invocation
 ----------------------
 
        Make rules may invoke scripts to build the kernel. The rules shall
@@ -744,7 +882,7 @@ Both possibilities are described in the following.
        as a prerequisite.
        This is possible in two ways:
 
-       (1) List the prerequisite explicitly in a special rule.
+       (1) List the prerequisite explicitly in a custom rule.
 
        Example::
 
@@ -755,11 +893,11 @@ Both possibilities are described in the following.
 
        The target $(obj)/devlist.h will not be built before
        $(obj)/gen-devlist is updated. Note that references to
-       the host programs in special rules must be prefixed with $(obj).
+       the host programs in custom rules must be prefixed with $(obj).
 
        (2) Use always-y
 
-       When there is no suitable special rule, and the host program
+       When there is no suitable custom rule, and the host program
        shall be built when a makefile is entered, the always-y
        variable shall be used.
 
@@ -933,7 +1071,7 @@ When "make clean" is executed, make will descend down in arch/x86/boot,
 and clean as usual. The Makefile located in arch/x86/boot/ may use
 the subdir- trick to descend further down.
 
-Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is
+Note 1: arch/$(SRCARCH)/Makefile cannot use "subdir-", because that file is
 included in the top level makefile, and the kbuild infrastructure
 is not operational at that point.
 
@@ -946,9 +1084,9 @@ be visited during "make clean".
 The top level Makefile sets up the environment and does the preparation,
 before starting to descend down in the individual directories.
 The top level makefile contains the generic part, whereas
-arch/$(ARCH)/Makefile contains what is required to set up kbuild
+arch/$(SRCARCH)/Makefile contains what is required to set up kbuild
 for said architecture.
-To do so, arch/$(ARCH)/Makefile sets up a number of variables and defines
+To do so, arch/$(SRCARCH)/Makefile sets up a number of variables and defines
 a few targets.
 
 When kbuild executes, the following steps are followed (roughly):
@@ -956,14 +1094,14 @@ When kbuild executes, the following steps are followed (roughly):
 1) Configuration of the kernel => produce .config
 2) Store kernel version in include/linux/version.h
 3) Updating all other prerequisites to the target prepare:
-   - Additional prerequisites are specified in arch/$(ARCH)/Makefile
+   - Additional prerequisites are specified in arch/$(SRCARCH)/Makefile
 4) Recursively descend down in all directories listed in
    init-* core* drivers-* net-* libs-* and build all targets.
-   - The values of the above variables are expanded in arch/$(ARCH)/Makefile.
+   - The values of the above variables are expanded in arch/$(SRCARCH)/Makefile.
 5) All object files are then linked and the resulting file vmlinux is
    located at the root of the obj tree.
    The very first objects linked are listed in head-y, assigned by
-   arch/$(ARCH)/Makefile.
+   arch/$(SRCARCH)/Makefile.
 6) Finally, the architecture-specific part does any required post processing
    and builds the final bootimage.
    - This includes building boot records
@@ -1154,7 +1292,7 @@ When kbuild executes, the following steps are followed (roughly):
        machinery is all architecture-independent.
 
 
-       head-y, init-y, core-y, libs-y, drivers-y, net-y
+       head-y, core-y, libs-y, drivers-y
            $(head-y) lists objects to be linked first in vmlinux.
 
            $(libs-y) lists directories where a lib.a archive can be located.
@@ -1162,23 +1300,24 @@ When kbuild executes, the following steps are followed (roughly):
            The rest list directories where a built-in.a object file can be
            located.
 
-           $(init-y) objects will be located after $(head-y).
-
            Then the rest follows in this order:
 
-               $(core-y), $(libs-y), $(drivers-y) and $(net-y).
+               $(core-y), $(libs-y), $(drivers-y)
 
            The top level Makefile defines values for all generic directories,
-           and arch/$(ARCH)/Makefile only adds architecture-specific
+           and arch/$(SRCARCH)/Makefile only adds architecture-specific
            directories.
 
            Example::
 
-               #arch/sparc64/Makefile
-               core-y += arch/sparc64/kernel/
-               libs-y += arch/sparc64/prom/ arch/sparc64/lib/
-               drivers-$(CONFIG_OPROFILE)  += arch/sparc64/oprofile/
+               # arch/sparc/Makefile
+               core-y                 += arch/sparc/
+
+               libs-y                 += arch/sparc/prom/
+               libs-y                 += arch/sparc/lib/
 
+               drivers-$(CONFIG_PM) += arch/sparc/power/
+               drivers-$(CONFIG_OPROFILE)      += arch/sparc/oprofile/
 
 7.5 Architecture-specific boot images
 -------------------------------------
@@ -1189,15 +1328,15 @@ When kbuild executes, the following steps are followed (roughly):
        The actual goals are not standardized across architectures.
 
        It is common to locate any additional processing in a boot/
-       directory below arch/$(ARCH)/.
+       directory below arch/$(SRCARCH)/.
 
        Kbuild does not provide any smart way to support building a
-       target specified in boot/. Therefore arch/$(ARCH)/Makefile shall
+       target specified in boot/. Therefore arch/$(SRCARCH)/Makefile shall
        call make manually to build a target in boot/.
 
        The recommended approach is to include shortcuts in
-       arch/$(ARCH)/Makefile, and use the full path when calling down
-       into the arch/$(ARCH)/boot/Makefile.
+       arch/$(SRCARCH)/Makefile, and use the full path when calling down
+       into the arch/$(SRCARCH)/boot/Makefile.
 
        Example::
 
@@ -1217,7 +1356,7 @@ When kbuild executes, the following steps are followed (roughly):
 
                #arch/x86/Makefile
                define archhelp
-                 echo  '* bzImage      - Image (arch/$(ARCH)/boot/bzImage)'
+                 echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'
                endif
 
        When make is executed without arguments, the first goal encountered
@@ -1235,71 +1374,12 @@ When kbuild executes, the following steps are followed (roughly):
 
        When "make" is executed without arguments, bzImage will be built.
 
-7.6 Building non-kbuild targets
--------------------------------
-
-    extra-y
-       extra-y specifies additional targets created in the current
-       directory, in addition to any targets specified by `obj-*`.
-
-       Listing all targets in extra-y is required for two purposes:
-
-       1) Enable kbuild to check changes in command lines
-
-          - When $(call if_changed,xxx) is used
-
-       2) kbuild knows what files to delete during "make clean"
-
-       Example::
-
-               #arch/x86/kernel/Makefile
-               extra-y := head.o init_task.o
-
-       In this example, extra-y is used to list object files that
-       shall be built, but shall not be linked as part of built-in.a.
-
 7.7 Commands useful for building a boot image
 ---------------------------------------------
 
     Kbuild provides a few macros that are useful when building a
     boot image.
 
-    if_changed
-       if_changed is the infrastructure used for the following commands.
-
-       Usage::
-
-               target: source(s) FORCE
-                       $(call if_changed,ld/objcopy/gzip/...)
-
-       When the rule is evaluated, it is checked to see if any files
-       need an update, or the command line has changed since the last
-       invocation. The latter will force a rebuild if any options
-       to the executable have changed.
-       Any target that utilises if_changed must be listed in $(targets),
-       otherwise the command line check will fail, and the target will
-       always be built.
-       Assignments to $(targets) are without $(obj)/ prefix.
-       if_changed may be used in conjunction with custom commands as
-       defined in 7.8 "Custom kbuild commands".
-
-       Note: It is a typical mistake to forget the FORCE prerequisite.
-       Another common pitfall is that whitespace is sometimes
-       significant; for instance, the below will fail (note the extra space
-       after the comma)::
-
-               target: source(s) FORCE
-
-       **WRONG!**      $(call if_changed, ld/objcopy/gzip/...)
-
-        Note:
-             if_changed should not be used more than once per target.
-              It stores the executed command in a corresponding .cmd
-
-        file and multiple calls would result in overwrites and
-        unwanted results when the target is up to date and only the
-        tests on changed commands trigger execution of commands.
-
     ld
        Link target. Often, LDFLAGS_$@ is used to set specific options to ld.
 
@@ -1332,7 +1412,7 @@ When kbuild executes, the following steps are followed (roughly):
 
     objcopy
        Copy binary. Uses OBJCOPYFLAGS usually specified in
-       arch/$(ARCH)/Makefile.
+       arch/$(SRCARCH)/Makefile.
        OBJCOPYFLAGS_$@ may be used to set additional options.
 
     gzip
@@ -1361,41 +1441,11 @@ When kbuild executes, the following steps are followed (roughly):
                targets += $(dtb-y)
                DTC_FLAGS ?= -p 1024
 
-7.8 Custom kbuild commands
---------------------------
-
-       When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand
-       of a command is normally displayed.
-       To enable this behaviour for custom commands kbuild requires
-       two variables to be set::
-
-               quiet_cmd_<command>     - what shall be echoed
-                     cmd_<command>     - the command to execute
-
-       Example::
-
-               #
-               quiet_cmd_image = BUILD   $@
-                     cmd_image = $(obj)/tools/build $(BUILDFLAGS) \
-                                                    $(obj)/vmlinux.bin > $@
-
-               targets += bzImage
-               $(obj)/bzImage: $(obj)/vmlinux.bin $(obj)/tools/build FORCE
-                       $(call if_changed,image)
-                       @echo 'Kernel: $@ is ready'
-
-       When updating the $(obj)/bzImage target, the line:
-
-               BUILD    arch/x86/boot/bzImage
-
-       will be displayed with "make KBUILD_VERBOSE=0".
-
-
 7.9 Preprocessing linker scripts
 --------------------------------
 
        When the vmlinux image is built, the linker script
-       arch/$(ARCH)/kernel/vmlinux.lds is used.
+       arch/$(SRCARCH)/kernel/vmlinux.lds is used.
        The script is a preprocessed variant of the file vmlinux.lds.S
        located in the same directory.
        kbuild knows .lds files and includes a rule `*lds.S` -> `*lds`.
@@ -1405,9 +1455,6 @@ When kbuild executes, the following steps are followed (roughly):
                #arch/x86/kernel/Makefile
                extra-y := vmlinux.lds
 
-               #Makefile
-               export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
-
        The assignment to extra-y is used to tell kbuild to build the
        target vmlinux.lds.
        The assignment to $(CPPFLAGS_vmlinux.lds) tells kbuild to use the
@@ -1481,7 +1528,7 @@ See subsequent chapter for the syntax of the Kbuild file.
 
        If an architecture uses a verbatim copy of a header from
        include/asm-generic then this is listed in the file
-       arch/$(ARCH)/include/asm/Kbuild like this:
+       arch/$(SRCARCH)/include/asm/Kbuild like this:
 
                Example::
 
@@ -1492,7 +1539,7 @@ See subsequent chapter for the syntax of the Kbuild file.
        During the prepare phase of the build a wrapper include
        file is generated in the directory::
 
-               arch/$(ARCH)/include/generated/asm
+               arch/$(SRCARCH)/include/generated/asm
 
        When a header is exported where the architecture uses
        the generic header a similar wrapper is generated as part
@@ -1527,8 +1574,8 @@ See subsequent chapter for the syntax of the Kbuild file.
        to define the minimum set of ASM headers that all architectures must have.
 
        This works like optional generic-y. If a mandatory header is missing
-       in arch/$(ARCH)/include/(uapi/)/asm, Kbuild will automatically generate
-       a wrapper of the asm-generic one.
+       in arch/$(SRCARCH)/include/(uapi/)/asm, Kbuild will automatically
+       generate a wrapper of the asm-generic one.
 
 9 Kbuild Variables
 ==================
@@ -1564,6 +1611,16 @@ The top Makefile exports the following variables:
 
            make ARCH=m68k ...
 
+    SRCARCH
+       This variable specifies the directory in arch/ to build.
+
+       ARCH and SRCARCH may not necessarily match. A couple of arch
+       directories are biarch, that is, a single `arch/*/` directory supports
+       both 32-bit and 64-bit.
+
+       For example, you can pass in ARCH=i386, ARCH=x86_64, or ARCH=x86.
+       For all of them, SRCARCH=x86 because arch/x86/ supports both i386 and
+       x86_64.
 
     INSTALL_PATH
        This variable defines a place for the arch Makefiles to install
index 85ccc87..a1f3eb7 100644 (file)
@@ -332,7 +332,7 @@ according to the following rule:
              There are two notable exceptions to this rule: larger
              subsystems have their own directory under include/, such as
              include/scsi; and architecture specific headers are located
-             under arch/$(ARCH)/include/.
+             under arch/$(SRCARCH)/include/.
 
 4.1 Kernel Includes
 -------------------
index 1910079..b063f2f 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use strict;
 use Text::Tabs;
 use Getopt::Long;
index 1548d84..54492aa 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # The TCM v4 multi-protocol fabric module generation script for drivers/target/$NEW_MOD
 #
 # Copyright (c) 2010 Rising Tide Systems
index 0ab40e0..aa9cc7a 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # add symbolic names to read_msr / write_msr in trace
 # decode_msr msr-index.h < trace
 import sys
index 0a120aa..b9b7d80 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # This is a POC (proof of concept or piece of crap, take your pick) for reading the
 # text representation of trace output related to page allocation. It makes an attempt
 # to extract some high-level information on what is going on. The accuracy of the parser
index 995da15..2f4e398 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # This is a POC for reading the text representation of trace output related to
 # page reclaim. It makes an attempt to extract some high-level information on
 # what is going on. The accuracy of the parser may vary
index e00a66d..70254ea 100644 (file)
@@ -262,6 +262,18 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared
 memory region.  This ioctl returns the size of that region.  See the
 KVM_RUN documentation for details.
 
+Besides the size of the KVM_RUN communication region, other areas of
+the VCPU file descriptor can be mmap-ed, including:
+
+- if KVM_CAP_COALESCED_MMIO is available, a page at
+  KVM_COALESCED_MMIO_PAGE_OFFSET * PAGE_SIZE; for historical reasons,
+  this page is included in the result of KVM_GET_VCPU_MMAP_SIZE.
+  KVM_CAP_COALESCED_MMIO is not documented yet.
+
+- if KVM_CAP_DIRTY_LOG_RING is available, a number of pages at
+  KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE.  For more information on
+  KVM_CAP_DIRTY_LOG_RING, see section 8.3.
+
 
 4.6 KVM_SET_MEMORY_REGION
 -------------------------
@@ -4455,9 +4467,9 @@ that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
 4.118 KVM_GET_SUPPORTED_HV_CPUID
 --------------------------------
 
-:Capability: KVM_CAP_HYPERV_CPUID
+:Capability: KVM_CAP_HYPERV_CPUID (vcpu), KVM_CAP_SYS_HYPERV_CPUID (system)
 :Architectures: x86
-:Type: vcpu ioctl
+:Type: system ioctl, vcpu ioctl
 :Parameters: struct kvm_cpuid2 (in/out)
 :Returns: 0 on success, -1 on error
 
@@ -4502,9 +4514,6 @@ Currently, the following list of CPUID leaves are returned:
  - HYPERV_CPUID_SYNDBG_INTERFACE
  - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
 
-HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
-enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
-
 Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure
 with the 'nent' field indicating the number of entries in the variable-size
 array 'entries'.  If the number of entries is too low to describe all Hyper-V
@@ -4515,6 +4524,15 @@ number of valid entries in the 'entries' array, which is then filled.
 'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
 userspace should not expect to get any particular value there.
 
+Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike
+system ioctl which exposes all supported feature bits unconditionally, vcpu
+version has the following quirks:
+- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED
+  feature bit are only exposed when Enlightened VMCS was previously enabled
+  on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
+- HV_STIMER_DIRECT_MODE_AVAILABLE bit is only exposed with in-kernel LAPIC.
+  (presumes KVM_CREATE_IRQCHIP has already been called).
+
 4.119 KVM_ARM_VCPU_FINALIZE
 ---------------------------
 
@@ -6390,3 +6408,91 @@ When enabled, KVM will disable paravirtual features provided to the
 guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf
 (0x40000001). Otherwise, a guest may use the paravirtual features
 regardless of what has actually been exposed through the CPUID leaf.
+
+
+8.29 KVM_CAP_DIRTY_LOG_RING
+---------------------------
+
+:Architectures: x86
+:Parameters: args[0] - size of the dirty log ring
+
+KVM is capable of tracking dirty memory using ring buffers that are
+mmaped into userspace; there is one dirty ring per vcpu.
+
+The dirty ring is available to userspace as an array of
+``struct kvm_dirty_gfn``.  Each dirty entry it's defined as::
+
+  struct kvm_dirty_gfn {
+          __u32 flags;
+          __u32 slot; /* as_id | slot_id */
+          __u64 offset;
+  };
+
+The following values are defined for the flags field to define the
+current state of the entry::
+
+  #define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+  #define KVM_DIRTY_GFN_F_RESET           BIT(1)
+  #define KVM_DIRTY_GFN_F_MASK            0x3
+
+Userspace should call KVM_ENABLE_CAP ioctl right after KVM_CREATE_VM
+ioctl to enable this capability for the new guest and set the size of
+the rings.  Enabling the capability is only allowed before creating any
+vCPU, and the size of the ring must be a power of two.  The larger the
+ring buffer, the less likely the ring is full and the VM is forced to
+exit to userspace. The optimal size depends on the workload, but it is
+recommended that it be at least 64 KiB (4096 entries).
+
+Just like for dirty page bitmaps, the buffer tracks writes to
+all user memory regions for which the KVM_MEM_LOG_DIRTY_PAGES flag was
+set in KVM_SET_USER_MEMORY_REGION.  Once a memory region is registered
+with the flag set, userspace can start harvesting dirty pages from the
+ring buffer.
+
+An entry in the ring buffer can be unused (flag bits ``00``),
+dirty (flag bits ``01``) or harvested (flag bits ``1X``).  The
+state machine for the entry is as follows::
+
+          dirtied         harvested        reset
+     00 -----------> 01 -------------> 1X -------+
+      ^                                          |
+      |                                          |
+      +------------------------------------------+
+
+To harvest the dirty pages, userspace accesses the mmaped ring buffer
+to read the dirty GFNs.  If the flags has the DIRTY bit set (at this stage
+the RESET bit must be cleared), then it means this GFN is a dirty GFN.
+The userspace should harvest this GFN and mark the flags from state
+``01b`` to ``1Xb`` (bit 0 will be ignored by KVM, but bit 1 must be set
+to show that this GFN is harvested and waiting for a reset), and move
+on to the next GFN.  The userspace should continue to do this until the
+flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
+all the dirty GFNs that were available.
+
+It's not necessary for userspace to harvest the all dirty GFNs at once.
+However it must collect the dirty GFNs in sequence, i.e., the userspace
+program cannot skip one dirty GFN to collect the one next to it.
+
+After processing one or more entries in the ring buffer, userspace
+calls the VM ioctl KVM_RESET_DIRTY_RINGS to notify the kernel about
+it, so that the kernel will reprotect those collected GFNs.
+Therefore, the ioctl must be called *before* reading the content of
+the dirty pages.
+
+The dirty ring can get full.  When it happens, the KVM_RUN of the
+vcpu will return with exit reason KVM_EXIT_DIRTY_LOG_FULL.
+
+The dirty ring interface has a major difference comparing to the
+KVM_GET_DIRTY_LOG interface in that, when reading the dirty ring from
+userspace, it's still possible that the kernel has not yet flushed the
+processor's dirty page buffers into the kernel buffer (with dirty bitmaps, the
+flushing is done by the KVM_GET_DIRTY_LOG ioctl).  To achieve that, one
+needs to kick the vcpu out of KVM_RUN using a signal.  The resulting
+vmexit ensures that all dirty GFNs are flushed to the dirty rings.
+
+NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding
+ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls
+KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG.  After enabling
+KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
+machine will switch to ring-buffer dirty page tracking and further
+KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
index 687b60d..392521a 100644 (file)
@@ -19,8 +19,8 @@ Two new SMCCC compatible hypercalls are defined:
 
 These are only available in the SMC64/HVC64 calling convention as
 paravirtualized time is not available to 32 bit Arm guests. The existence of
-the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES
-mechanism before calling it.
+the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1
+ARCH_FEATURES mechanism before calling it.
 
 PV_TIME_FEATURES
     ============= ========    ==========
index 28d7acd..ad0e34b 100644 (file)
@@ -1183,7 +1183,7 @@ F:        Documentation/devicetree/bindings/interrupt-controller/google,goldfish-pic.tx
 F:     drivers/irqchip/irq-goldfish-pic.c
 
 ANDROID GOLDFISH RTC DRIVER
-M:     Miodrag Dinic <miodrag.dinic@mips.com>
+M:     Jiaxun Yang <jiaxun.yang@flygoat.com>
 S:     Supported
 F:     Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt
 F:     drivers/rtc/rtc-goldfish.c
@@ -2146,8 +2146,10 @@ L:       linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 W:     http://linux-chenxing.org/
 F:     Documentation/devicetree/bindings/arm/mstar/*
+F:     Documentation/devicetree/bindings/gpio/mstar,msc313-gpio.yaml
 F:     arch/arm/boot/dts/mstar-*
 F:     arch/arm/mach-mstar/
+F:     drivers/gpio/gpio-msc313.c
 F:     include/dt-bindings/gpio/msc313-gpio.h
 
 ARM/NEC MOBILEPRO 900/c MACHINE SUPPORT
@@ -5295,6 +5297,12 @@ F:       include/linux/dma-mapping.h
 F:     include/linux/dma-map-ops.h
 F:     kernel/dma/
 
+DMA MAPPING BENCHMARK
+M:     Barry Song <song.bao.hua@hisilicon.com>
+L:     iommu@lists.linux-foundation.org
+F:     kernel/dma/map_benchmark.c
+F:     tools/testing/selftests/dma/
+
 DMA-BUF HEAPS FRAMEWORK
 M:     Sumit Semwal <sumit.semwal@linaro.org>
 R:     Benjamin Gaignard <benjamin.gaignard@linaro.org>
@@ -7500,7 +7508,7 @@ M:        Bob Peterson <rpeterso@redhat.com>
 M:     Andreas Gruenbacher <agruenba@redhat.com>
 L:     cluster-devel@redhat.com
 S:     Supported
-W:     http://sources.redhat.com/cluster/
+B:     https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=gfs2
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
 F:     Documentation/filesystems/gfs2*
 F:     fs/gfs2/
@@ -7548,6 +7556,7 @@ M:        Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 L:     linux-gpio@vger.kernel.org
 L:     linux-acpi@vger.kernel.org
 S:     Maintained
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git
 F:     Documentation/firmware-guide/acpi/gpio-properties.rst
 F:     drivers/gpio/gpiolib-acpi.c
 F:     drivers/gpio/gpiolib-acpi.h
@@ -7981,6 +7990,12 @@ L:       dmaengine@vger.kernel.org
 S:     Maintained
 F:     drivers/dma/hisi_dma.c
 
+HISILICON GPIO DRIVER
+M:     Luo Jiaxing <luojiaxing@huawei.com>
+L:     linux-gpio@vger.kernel.org
+S:     Maintained
+F:     drivers/gpio/gpio-hisi.c
+
 HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
 M:     Zaibo Xu <xuzaibo@huawei.com>
 L:     linux-crypto@vger.kernel.org
@@ -8407,7 +8422,7 @@ F:        Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.txt
 F:     drivers/i3c/master/dw*
 
 I3C SUBSYSTEM
-M:     Boris Brezillon <bbrezillon@kernel.org>
+M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
 L:     linux-i3c@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 C:     irc://chat.freenode.net/linux-i3c
@@ -15099,7 +15114,7 @@ RENESAS CLOCK DRIVERS
 M:     Geert Uytterhoeven <geert+renesas@glider.be>
 L:     linux-renesas-soc@vger.kernel.org
 S:     Supported
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git clk-renesas
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git renesas-clk
 F:     Documentation/devicetree/bindings/clock/renesas,*
 F:     drivers/clk/renesas/
 
@@ -19500,6 +19515,16 @@ S:     Maintained
 F:     Documentation/devicetree/bindings/net/can/xilinx_can.txt
 F:     drivers/net/can/xilinx_can.c
 
+XILINX GPIO DRIVER
+M:     Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
+R:     Srinivas Neeli <srinivas.neeli@xilinx.com>
+R:     Michal Simek <michal.simek@xilinx.com>
+S:     Maintained
+F:     Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
+F:     Documentation/devicetree/bindings/gpio/gpio-zynq.txt
+F:     drivers/gpio/gpio-xilinx.c
+F:     drivers/gpio/gpio-zynq.c
+
 XILINX SD-FEC IP CORES
 M:     Derek Kiernan <derek.kiernan@xilinx.com>
 M:     Dragan Cvetic <dragan.cvetic@xilinx.com>
index e30cf02..29c3eec 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -450,7 +450,7 @@ LEX         = flex
 YACC           = bison
 AWK            = awk
 INSTALLKERNEL  := installkernel
-DEPMOD         = /sbin/depmod
+DEPMOD         = depmod
 PERL           = perl
 PYTHON         = python
 PYTHON3                = python3
index a08999d..78c6f05 100644 (file)
@@ -976,16 +976,16 @@ config VMAP_STACK
        default y
        bool "Use a virtually-mapped stack"
        depends on HAVE_ARCH_VMAP_STACK
-       depends on !KASAN || KASAN_VMALLOC
+       depends on !KASAN || KASAN_HW_TAGS || KASAN_VMALLOC
        help
          Enable this if you want the use virtually-mapped kernel stacks
          with guard pages.  This causes kernel stack overflows to be
          caught immediately rather than causing difficult-to-diagnose
          corruption.
 
-         To use this with KASAN, the architecture must support backing
-         virtual mappings with real shadow memory, and KASAN_VMALLOC must
-         be enabled.
+         To use this with software KASAN modes, the architecture must support
+         backing virtual mappings with real shadow memory, and KASAN_VMALLOC
+         must be enabled.
 
 config ARCH_OPTIONAL_KERNEL_RWX
        def_bool n
index 1b1d596..ce30779 100644 (file)
@@ -216,6 +216,6 @@ alpha_rtc_init(void)
                rtc->ops = &remote_rtc_ops;
 #endif
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 device_initcall(alpha_rtc_init);
index ee7b01b..a661706 100644 (file)
 548    common  pidfd_getfd                     sys_pidfd_getfd
 549    common  faccessat2                      sys_faccessat2
 550    common  process_madvise                 sys_process_madvise
+551    common  epoll_pwait2                    sys_epoll_pwait2
index ba937d8..1382489 100644 (file)
@@ -5,7 +5,6 @@ config ARM
        select ARCH_32BIT_OFF_T
        select ARCH_HAS_BINFMT_FLAT
        select ARCH_HAS_DEBUG_VIRTUAL if MMU
-       select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FORTIFY_SOURCE
@@ -57,6 +56,7 @@ config ARM
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
        select GENERIC_IRQ_SHOW_LEVEL
+       select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select GENERIC_PCI_IOMAP
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
@@ -68,6 +68,7 @@ config ARM
        select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
        select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+       select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_PFN_VALID
        select HAVE_ARCH_SECCOMP
@@ -245,7 +246,7 @@ config ARM_PATCH_PHYS_VIRT
          kernel in system memory.
 
          This can only be used with non-XIP MMU kernels where the base
-         of physical memory is at a 16MB boundary.
+         of physical memory is at a 2 MiB boundary.
 
          Only disable this option if you know that you do not require
          this feature (eg, building a kernel for a single machine) and
@@ -1298,6 +1299,15 @@ config PAGE_OFFSET
        default 0xB0000000 if VMSPLIT_3G_OPT
        default 0xC0000000
 
+config KASAN_SHADOW_OFFSET
+       hex
+       depends on KASAN
+       default 0x1f000000 if PAGE_OFFSET=0x40000000
+       default 0x5f000000 if PAGE_OFFSET=0x80000000
+       default 0x9f000000 if PAGE_OFFSET=0xC0000000
+       default 0x8f000000 if PAGE_OFFSET=0xB0000000
+       default 0xffffffff
+
 config NR_CPUS
        int "Maximum number of CPUs (2-32)"
        range 2 32
index 4a066c6..4aaec95 100644 (file)
@@ -10,7 +10,7 @@
 #
 # Copyright (C) 1995-2001 by Russell King
 
-LDFLAGS_vmlinux        := --no-undefined -X --pic-veneer
+LDFLAGS_vmlinux        := --no-undefined -X --pic-veneer -z norelro
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux        += --be8
 KBUILD_LDFLAGS_MODULE  += --be8
index e156741..fb521ef 100644 (file)
@@ -24,6 +24,7 @@ OBJS          += hyp-stub.o
 endif
 
 GCOV_PROFILE           := n
+KASAN_SANITIZE         := n
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT                := n
index caa2732..d9cce72 100644 (file)
 
 #include "efi-header.S"
 
+#ifdef __ARMEB__
+#define OF_DT_MAGIC 0xd00dfeed
+#else
+#define OF_DT_MAGIC 0xedfe0dd0
+#endif
+
  AR_CLASS(     .arch   armv7-a )
  M_CLASS(      .arch   armv7-m )
 
                /*
                 * Debug print of the final appended DTB location
                 */
-               .macro dbgadtb, begin, end
+               .macro dbgadtb, begin, size
 #ifdef DEBUG
                kputc   #'D'
                kputc   #'T'
                kputc   #'('
                kputc   #'0'
                kputc   #'x'
-               kphex   \end, 8         /* End of appended DTB */
+               kphex   \size, 8        /* Size of appended DTB */
                kputc   #')'
                kputc   #'\n'
 #endif
                orr     \res, \res, \tmp1, lsl #24
                .endm
 
+               .macro  be32tocpu, val, tmp
+#ifndef __ARMEB__
+               /* convert to little endian */
+               eor     \tmp, \val, \val, ror #16
+               bic     \tmp, \tmp, #0x00ff0000
+               mov     \val, \val, ror #8
+               eor     \val, \val, \tmp, lsr #8
+#endif
+               .endm
+
                .section ".start", "ax"
 /*
  * sort out different calling conventions
@@ -325,11 +341,7 @@ restart:   adr     r0, LC1
  */
 
                ldr     lr, [r6, #0]
-#ifndef __ARMEB__
-               ldr     r1, =0xedfe0dd0         @ sig is 0xd00dfeed big endian
-#else
-               ldr     r1, =0xd00dfeed
-#endif
+               ldr     r1, =OF_DT_MAGIC
                cmp     lr, r1
                bne     dtb_check_done          @ not found
 
@@ -345,13 +357,7 @@ restart:   adr     r0, LC1
 
                /* Get the initial DTB size */
                ldr     r5, [r6, #4]
-#ifndef __ARMEB__
-               /* convert to little endian */
-               eor     r1, r5, r5, ror #16
-               bic     r1, r1, #0x00ff0000
-               mov     r5, r5, ror #8
-               eor     r5, r5, r1, lsr #8
-#endif
+               be32tocpu r5, r1
                dbgadtb r6, r5
                /* 50% DTB growth should be good enough */
                add     r5, r5, r5, lsr #1
@@ -403,13 +409,7 @@ restart:   adr     r0, LC1
 
                /* Get the current DTB size */
                ldr     r5, [r6, #4]
-#ifndef __ARMEB__
-               /* convert r5 (dtb size) to little endian */
-               eor     r1, r5, r5, ror #16
-               bic     r1, r1, #0x00ff0000
-               mov     r5, r5, ror #8
-               eor     r5, r5, r1, lsr #8
-#endif
+               be32tocpu r5, r1
 
                /* preserve 64-bit alignment */
                add     r5, r5, #7
@@ -468,15 +468,10 @@ dtb_check_done:
 
                /*
                 * Compute the address of the hyp vectors after relocation.
-                * This requires some arithmetic since we cannot directly
-                * reference __hyp_stub_vectors in a PC-relative way.
                 * Call __hyp_set_vectors with the new address so that we
                 * can HVC again after the copy.
                 */
-0:             adr     r0, 0b
-               movw    r1, #:lower16:__hyp_stub_vectors - 0b
-               movt    r1, #:upper16:__hyp_stub_vectors - 0b
-               add     r0, r0, r1
+               adr_l   r0, __hyp_stub_vectors
                sub     r0, r0, r5
                add     r0, r0, r10
                bl      __hyp_set_vectors
@@ -627,17 +622,11 @@ not_relocated:    mov     r0, #0
                cmp     r0, #HYP_MODE           @ if not booted in HYP mode...
                bne     __enter_kernel          @ boot kernel directly
 
-               adr     r12, .L__hyp_reentry_vectors_offset
-               ldr     r0, [r12]
-               add     r0, r0, r12
-
+               adr_l   r0, __hyp_reentry_vectors
                bl      __hyp_set_vectors
                __HVC(0)                        @ otherwise bounce to hyp mode
 
                b       .                       @ should never be reached
-
-               .align  2
-.L__hyp_reentry_vectors_offset:        .long   __hyp_reentry_vectors - .
 #else
                b       __enter_kernel
 #endif
@@ -1440,8 +1429,7 @@ ENTRY(efi_enter_kernel)
                mov     r4, r0                  @ preserve image base
                mov     r8, r1                  @ preserve DT pointer
 
- ARM(          adrl    r0, call_cache_fn       )
- THUMB(                adr     r0, call_cache_fn       )
+               adr_l   r0, call_cache_fn
                adr     r1, 0f                  @ clean the region of code we
                bl      cache_clean_flush       @ may run with the MMU off
 
index ade5079..8c0fa27 100644 (file)
@@ -7,6 +7,25 @@
 
 #include <linux/string.h>
 
+/*
+ * The decompressor is built without KASan but uses the same redirects as the
+ * rest of the kernel when CONFIG_KASAN is enabled, defining e.g. memcpy()
+ * to __memcpy() but since we are not linking with the main kernel string
+ * library in the decompressor, that will lead to link failures.
+ *
+ * Undefine KASan's versions, define the wrapped functions and alias them to
+ * the right names so that when e.g. __memcpy() appear in the code, it will
+ * still be linked to this local version of memcpy().
+ */
+#ifdef CONFIG_KASAN
+#undef memcpy
+#undef memmove
+#undef memset
+void *__memcpy(void *__dest, __const void *__src, size_t __n) __alias(memcpy);
+void *__memmove(void *__dest, __const void *__src, size_t count) __alias(memmove);
+void *__memset(void *s, int c, size_t count) __alias(memset);
+#endif
+
 void *memcpy(void *__dest, __const void *__src, size_t __n)
 {
        int i = 0;
index feac2c8..6ed3042 100644 (file)
  */
 #define ALT_UP(instr...)                                       \
        .pushsection ".alt.smp.init", "a"                       ;\
-       .long   9998b                                           ;\
+       .long   9998b - .                                       ;\
 9997:  instr                                                   ;\
        .if . - 9997b == 2                                      ;\
                nop                                             ;\
        .popsection
 #define ALT_UP_B(label)                                        \
        .pushsection ".alt.smp.init", "a"                       ;\
-       .long   9998b                                           ;\
+       .long   9998b - .                                       ;\
        W(b)    . + (label - 9998b)                                     ;\
        .popsection
 #else
@@ -494,4 +494,88 @@ THUMB(     orr     \reg , \reg , #PSR_T_BIT        )
 #define _ASM_NOKPROBE(entry)
 #endif
 
+       .macro          __adldst_l, op, reg, sym, tmp, c
+       .if             __LINUX_ARM_ARCH__ < 7
+       ldr\c           \tmp, .La\@
+       .subsection     1
+       .align          2
+.La\@: .long           \sym - .Lpc\@
+       .previous
+       .else
+       .ifnb           \c
+ THUMB(        ittt            \c                      )
+       .endif
+       movw\c          \tmp, #:lower16:\sym - .Lpc\@
+       movt\c          \tmp, #:upper16:\sym - .Lpc\@
+       .endif
+
+#ifndef CONFIG_THUMB2_KERNEL
+       .set            .Lpc\@, . + 8                   // PC bias
+       .ifc            \op, add
+       add\c           \reg, \tmp, pc
+       .else
+       \op\c           \reg, [pc, \tmp]
+       .endif
+#else
+.Lb\@: add\c           \tmp, \tmp, pc
+       /*
+        * In Thumb-2 builds, the PC bias depends on whether we are currently
+        * emitting into a .arm or a .thumb section. The size of the add opcode
+        * above will be 2 bytes when emitting in Thumb mode and 4 bytes when
+        * emitting in ARM mode, so let's use this to account for the bias.
+        */
+       .set            .Lpc\@, . + (. - .Lb\@)
+
+       .ifnc           \op, add
+       \op\c           \reg, [\tmp]
+       .endif
+#endif
+       .endm
+
+       /*
+        * mov_l - move a constant value or [relocated] address into a register
+        */
+       .macro          mov_l, dst:req, imm:req
+       .if             __LINUX_ARM_ARCH__ < 7
+       ldr             \dst, =\imm
+       .else
+       movw            \dst, #:lower16:\imm
+       movt            \dst, #:upper16:\imm
+       .endif
+       .endm
+
+       /*
+        * adr_l - adr pseudo-op with unlimited range
+        *
+        * @dst: destination register
+        * @sym: name of the symbol
+        * @cond: conditional opcode suffix
+        */
+       .macro          adr_l, dst:req, sym:req, cond
+       __adldst_l      add, \dst, \sym, \dst, \cond
+       .endm
+
+       /*
+        * ldr_l - ldr <literal> pseudo-op with unlimited range
+        *
+        * @dst: destination register
+        * @sym: name of the symbol
+        * @cond: conditional opcode suffix
+        */
+       .macro          ldr_l, dst:req, sym:req, cond
+       __adldst_l      ldr, \dst, \sym, \dst, \cond
+       .endm
+
+       /*
+        * str_l - str <literal> pseudo-op with unlimited range
+        *
+        * @src: source register
+        * @sym: name of the symbol
+        * @tmp: mandatory scratch register
+        * @cond: conditional opcode suffix
+        */
+       .macro          str_l, src:req, sym:req, tmp:req, cond
+       __adldst_l      str, \src, \sym, \tmp, \cond
+       .endm
+
 #endif /* __ASM_ASSEMBLER_H__ */
index 898e9c7..595e538 100644 (file)
  * assembly implementation with completely non standard calling convention
  * for arguments and results (beware).
  */
-
-#ifdef __ARMEB__
-#define __xh "r0"
-#define __xl "r1"
-#else
-#define __xl "r0"
-#define __xh "r1"
-#endif
-
 static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
 {
        register unsigned int __base      asm("r4") = base;
        register unsigned long long __n   asm("r0") = *n;
        register unsigned long long __res asm("r2");
-       register unsigned int __rem       asm(__xh);
-       asm(    __asmeq("%0", __xh)
+       unsigned int __rem;
+       asm(    __asmeq("%0", "r0")
                __asmeq("%1", "r2")
-               __asmeq("%2", "r0")
-               __asmeq("%3", "r4")
+               __asmeq("%2", "r4")
                "bl     __do_div64"
-               : "=r" (__rem), "=r" (__res)
-               : "r" (__n), "r" (__base)
+               : "+r" (__n), "=r" (__res)
+               : "r" (__base)
                : "ip", "lr", "cc");
+       __rem = __n >> 32;
        *n = __res;
        return __rem;
 }
index 61941f3..b8102a6 100644 (file)
@@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t;
 #define R_ARM_NONE             0
 #define R_ARM_PC24             1
 #define R_ARM_ABS32            2
+#define R_ARM_REL32            3
 #define R_ARM_CALL             28
 #define R_ARM_JUMP24           29
 #define R_ARM_TARGET1          38
@@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t;
 #define R_ARM_PREL31           42
 #define R_ARM_MOVW_ABS_NC      43
 #define R_ARM_MOVT_ABS         44
+#define R_ARM_MOVW_PREL_NC     45
+#define R_ARM_MOVT_PREL                46
 
 #define R_ARM_THM_CALL         10
 #define R_ARM_THM_JUMP24       30
 #define R_ARM_THM_MOVW_ABS_NC  47
 #define R_ARM_THM_MOVT_ABS     48
+#define R_ARM_THM_MOVW_PREL_NC 49
+#define R_ARM_THM_MOVT_PREL    50
 
 /*
  * These are used to set parameters in the core dumps.
index c279a8a..707068f 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_FIXMAP_H
 #define _ASM_FIXMAP_H
 
-#define FIXADDR_START          0xffc00000UL
+#define FIXADDR_START          0xffc80000UL
 #define FIXADDR_END            0xfff00000UL
 #define FIXADDR_TOP            (FIXADDR_END - PAGE_SIZE)
 
index ab2b654..fc74812 100644 (file)
@@ -441,7 +441,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
 extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
 extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
-extern int devmem_is_allowed(unsigned long pfn);
 #endif
 
 /*
diff --git a/arch/arm/include/asm/kasan.h b/arch/arm/include/asm/kasan.h
new file mode 100644 (file)
index 0000000..303c35d
--- /dev/null
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/arm/include/asm/kasan.h
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ */
+
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifdef CONFIG_KASAN
+
+#include <asm/kasan_def.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+/*
+ * The compiler uses a shadow offset assuming that addresses start
+ * from 0. Kernel addresses don't start from 0, so shadow
+ * for kernel really starts from 'compiler's shadow offset' +
+ * ('kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT)
+ */
+
+asmlinkage void kasan_early_init(void);
+extern void kasan_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/kasan_def.h b/arch/arm/include/asm/kasan_def.h
new file mode 100644 (file)
index 0000000..5739605
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  arch/arm/include/asm/kasan_def.h
+ *
+ *  Copyright (c) 2018 Huawei Technologies Co., Ltd.
+ *
+ *  Author: Abbott Liu <liuwenliang@huawei.com>
+ */
+
+#ifndef __ASM_KASAN_DEF_H
+#define __ASM_KASAN_DEF_H
+
+#ifdef CONFIG_KASAN
+
+/*
+ * Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for
+ * the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB
+ * addressable by a 32bit architecture) out of the virtual address
+ * space to use as shadow memory for KASan as follows:
+ *
+ * +----+ 0xffffffff
+ * |    |                                                      \
+ * |    | |-> Static kernel image (vmlinux) BSS and page table
+ * |    |/
+ * +----+ PAGE_OFFSET
+ * |    |                                                      \
+ * |    | |->  Loadable kernel modules virtual address space area
+ * |    |/
+ * +----+ MODULES_VADDR = KASAN_SHADOW_END
+ * |    |                                              \
+ * |    | |-> The shadow area of kernel virtual address.
+ * |    |/
+ * +----+->  TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the
+ * |    |\   shadow address of MODULES_VADDR
+ * |    | |
+ * |    | |
+ * |    | |-> The user space area in lowmem. The kernel address
+ * |    | |   sanitizer do not use this space, nor does it map it.
+ * |    | |
+ * |    | |
+ * |    | |
+ * |    | |
+ * |    |/
+ * ------ 0
+ *
+ * 1) KASAN_SHADOW_START
+ *   This value begins with the MODULE_VADDR's shadow address. It is the
+ *   start of kernel virtual space. Since we have modules to load, we need
+ *   to cover also that area with shadow memory so we can find memory
+ *   bugs in modules.
+ *
+ * 2) KASAN_SHADOW_END
+ *   This value is the 0x100000000's shadow address: the mapping that would
+ *   be after the end of the kernel memory at 0xffffffff. It is the end of
+ *   kernel address sanitizer shadow area. It is also the start of the
+ *   module area.
+ *
+ * 3) KASAN_SHADOW_OFFSET:
+ *   This value is used to map an address to the corresponding shadow
+ *   address by the following formula:
+ *
+ *     shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ *
+ *  As you would expect, >> 3 is equal to dividing by 8, meaning each
+ *  byte in the shadow memory covers 8 bytes of kernel memory, so one
+ *  bit shadow memory per byte of kernel memory is used.
+ *
+ *  The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending
+ *  on the VMSPLIT layout of the system: the kernel and userspace can
+ *  split up lowmem in different ways according to needs, so we calculate
+ *  the shadow offset depending on this.
+ */
+
+#define KASAN_SHADOW_SCALE_SHIFT       3
+#define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+#define KASAN_SHADOW_END       ((UL(1) << (32 - KASAN_SHADOW_SCALE_SHIFT)) \
+                                + KASAN_SHADOW_OFFSET)
+#define KASAN_SHADOW_START      ((KASAN_SHADOW_END >> 3) + KASAN_SHADOW_OFFSET)
+
+#endif
+#endif
index 99035b5..2f841cb 100644 (file)
@@ -18,6 +18,7 @@
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
+#include <asm/kasan_def.h>
 
 /* PAGE_OFFSET - the virtual address of the start of the kernel image */
 #define PAGE_OFFSET            UL(CONFIG_PAGE_OFFSET)
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
  */
+#ifndef CONFIG_KASAN
 #define TASK_SIZE              (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M))
+#else
+#define TASK_SIZE              (KASAN_SHADOW_START)
+#endif
 #define TASK_UNMAPPED_BASE     ALIGN(TASK_SIZE / 3, SZ_16M)
 
 /*
  */
 #define XIP_VIRT_ADDR(physaddr)  (MODULES_VADDR + ((physaddr) & 0x000fffff))
 
+#define FDT_FIXED_BASE         UL(0xff800000)
+#define FDT_FIXED_SIZE         (2 * SECTION_SIZE)
+#define FDT_VIRT_BASE(physbase)        ((void *)(FDT_FIXED_BASE | (physbase) % SECTION_SIZE))
+
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
 /*
  * Allow 16MB-aligned ioremap pages
@@ -107,6 +116,7 @@ extern unsigned long vectors_base;
 #define MODULES_VADDR          PAGE_OFFSET
 
 #define XIP_VIRT_ADDR(physaddr)  (physaddr)
+#define FDT_VIRT_BASE(physbase)  ((void *)(physbase))
 
 #endif /* !CONFIG_MMU */
 
@@ -173,6 +183,7 @@ extern unsigned long vectors_base;
  * so that all we need to do is modify the 8-bit constant field.
  */
 #define __PV_BITS_31_24        0x81000000
+#define __PV_BITS_23_16        0x810000
 #define __PV_BITS_7_0  0x81
 
 extern unsigned long __pv_phys_pfn_offset;
@@ -183,43 +194,65 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET    ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET        (__pv_phys_pfn_offset)
 
-#define __pv_stub(from,to,instr,type)                  \
+#ifndef CONFIG_THUMB2_KERNEL
+#define __pv_stub(from,to,instr)                       \
        __asm__("@ __pv_stub\n"                         \
        "1:     " instr "       %0, %1, %2\n"           \
+       "2:     " instr "       %0, %0, %3\n"           \
        "       .pushsection .pv_table,\"a\"\n"         \
-       "       .long   1b\n"                           \
+       "       .long   1b - ., 2b - .\n"               \
        "       .popsection\n"                          \
        : "=r" (to)                                     \
-       : "r" (from), "I" (type))
+       : "r" (from), "I" (__PV_BITS_31_24),            \
+         "I"(__PV_BITS_23_16))
 
-#define __pv_stub_mov_hi(t)                            \
-       __asm__ volatile("@ __pv_stub_mov\n"            \
-       "1:     mov     %R0, %1\n"                      \
+#define __pv_add_carry_stub(x, y)                      \
+       __asm__("@ __pv_add_carry_stub\n"               \
+       "0:     movw    %R0, #0\n"                      \
+       "       adds    %Q0, %1, %R0, lsl #20\n"        \
+       "1:     mov     %R0, %2\n"                      \
+       "       adc     %R0, %R0, #0\n"                 \
        "       .pushsection .pv_table,\"a\"\n"         \
-       "       .long   1b\n"                           \
+       "       .long   0b - ., 1b - .\n"               \
        "       .popsection\n"                          \
-       : "=r" (t)                                      \
-       : "I" (__PV_BITS_7_0))
+       : "=&r" (y)                                     \
+       : "r" (x), "I" (__PV_BITS_7_0)                  \
+       : "cc")
+
+#else
+#define __pv_stub(from,to,instr)                       \
+       __asm__("@ __pv_stub\n"                         \
+       "0:     movw    %0, #0\n"                       \
+       "       lsl     %0, #21\n"                      \
+       "       " instr " %0, %1, %0\n"                 \
+       "       .pushsection .pv_table,\"a\"\n"         \
+       "       .long   0b - .\n"                       \
+       "       .popsection\n"                          \
+       : "=&r" (to)                                    \
+       : "r" (from))
 
 #define __pv_add_carry_stub(x, y)                      \
-       __asm__ volatile("@ __pv_add_carry_stub\n"      \
-       "1:     adds    %Q0, %1, %2\n"                  \
+       __asm__("@ __pv_add_carry_stub\n"               \
+       "0:     movw    %R0, #0\n"                      \
+       "       lsls    %R0, #21\n"                     \
+       "       adds    %Q0, %1, %R0\n"                 \
+       "1:     mvn     %R0, #0\n"                      \
        "       adc     %R0, %R0, #0\n"                 \
        "       .pushsection .pv_table,\"a\"\n"         \
-       "       .long   1b\n"                           \
+       "       .long   0b - ., 1b - .\n"               \
        "       .popsection\n"                          \
-       : "+r" (y)                                      \
-       : "r" (x), "I" (__PV_BITS_31_24)                \
+       : "=&r" (y)                                     \
+       : "r" (x)                                       \
        : "cc")
+#endif
 
 static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
        phys_addr_t t;
 
        if (sizeof(phys_addr_t) == 4) {
-               __pv_stub(x, t, "add", __PV_BITS_31_24);
+               __pv_stub(x, t, "add");
        } else {
-               __pv_stub_mov_hi(t);
                __pv_add_carry_stub(x, t);
        }
        return t;
@@ -235,7 +268,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
         * assembler expression receives 32 bit argument
         * in place where 'r' 32 bit operand is expected.
         */
-       __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24);
+       __pv_stub((unsigned long) x, t, "sub");
        return t;
 }
 
index 15f4674..fdee1f0 100644 (file)
@@ -21,6 +21,7 @@
 #define _PAGE_KERNEL_TABLE     (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL))
 
 #ifdef CONFIG_ARM_LPAE
+#define PGD_SIZE               (PTRS_PER_PGD * sizeof(pgd_t))
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
@@ -28,14 +29,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 }
 
 #else  /* !CONFIG_ARM_LPAE */
+#define PGD_SIZE               (PAGE_SIZE << 2)
 
 /*
  * Since we have only two-level page tables, these are trivial
  */
 #define pmd_alloc_one(mm,addr)         ({ BUG(); ((pmd_t *)2); })
 #define pmd_free(mm, pmd)              do { } while (0)
+#ifdef CONFIG_KASAN
+/* The KASan core unconditionally calls pud_populate() on all architectures */
+#define pud_populate(mm,pmd,pte)       do { } while (0)
+#else
 #define pud_populate(mm,pmd,pte)       BUG()
-
+#endif
 #endif /* CONFIG_ARM_LPAE */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
index baf7d02..70fe69b 100644 (file)
  * the pud: the pud entry is never bad, always exists, and can't be set or
  * cleared.
  */
-#define pud_none(pud)          (0)
-#define pud_bad(pud)           (0)
-#define pud_present(pud)       (1)
-#define pud_clear(pudp)                do { } while (0)
-#define set_pud(pud,pudp)      do { } while (0)
+static inline int pud_none(pud_t pud)
+{
+       return 0;
+}
+
+static inline int pud_bad(pud_t pud)
+{
+       return 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+       return 1;
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+}
 
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 {
index b924105..9e6b972 100644 (file)
@@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define __ALT_SMP_ASM(smp, up)                                         \
        "9998:  " smp "\n"                                              \
        "       .pushsection \".alt.smp.init\", \"a\"\n"                \
-       "       .long   9998b\n"                                        \
+       "       .long   9998b - .\n"                                    \
        "       " up "\n"                                               \
        "       .popsection\n"
 #else
index 1e36c40..402e3f3 100644 (file)
@@ -9,12 +9,12 @@
 
 #ifdef CONFIG_OF
 
-extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys);
+extern const struct machine_desc *setup_machine_fdt(void *dt_virt);
 extern void __init arm_dt_init_cpu_maps(void);
 
 #else /* CONFIG_OF */
 
-static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys)
+static inline const struct machine_desc *setup_machine_fdt(void *dt_virt)
 {
        return NULL;
 }
index 111a1d8..6c607c6 100644 (file)
@@ -5,6 +5,9 @@
 /*
  * We don't do inline string functions, since the
  * optimised inline asm versions are not small.
+ *
+ * The __underscore versions of some functions are for KASan to be able
+ * to replace them with instrumented versions.
  */
 
 #define __HAVE_ARCH_STRRCHR
@@ -15,15 +18,18 @@ extern char * strchr(const char * s, int c);
 
 #define __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *dest, const void *src, __kernel_size_t n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void * memmove(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *dest, const void *src, __kernel_size_t n);
 
 #define __HAVE_ARCH_MEMCHR
 extern void * memchr(const void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMSET
 extern void * memset(void *, int, __kernel_size_t);
+extern void *__memset(void *s, int c, __kernel_size_t n);
 
 #define __HAVE_ARCH_MEMSET32
 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
@@ -39,4 +45,24 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
        return __memset64(p, v, n * 8, v >> 32);
 }
 
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * must use non-instrumented versions of the mem*
+ * functions named __memcpy() etc. All such kernel code has
+ * been tagged with KASAN_SANITIZE_file.o = n, which means
+ * that the address sanitization argument isn't passed to the
+ * compiler, and __SANITIZE_ADDRESS__ is not set. As a result
+ * these defines kick in.
+ */
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
+
 #endif
index eb7ce27..70d4cbc 100644 (file)
 #include <asm/fpstate.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_KASAN
+/*
+ * KASan uses a lot of extra stack space so the thread size order needs to
+ * be increased.
+ */
+#define THREAD_SIZE_ORDER      2
+#else
 #define THREAD_SIZE_ORDER      1
+#endif
 #define THREAD_SIZE            (PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_START_SP                (THREAD_SIZE - 8)
 
index 907571f..e6eb7a2 100644 (file)
@@ -85,7 +85,7 @@
         */
        .macro  uaccess_entry, tsk, tmp0, tmp1, tmp2, disable
        ldr     \tmp1, [\tsk, #TI_ADDR_LIMIT]
-       mov     \tmp2, #TASK_SIZE
+       ldr     \tmp2, =TASK_SIZE
        str     \tmp2, [\tsk, #TI_ADDR_LIMIT]
  DACR( mrc     p15, 0, \tmp0, c3, c0, 0)
  DACR( str     \tmp0, [sp, #SVC_DACR])
index 09e67cb..ae295a3 100644 (file)
@@ -21,6 +21,9 @@ obj-y         := elf.o entry-common.o irq.o opcodes.o \
                   setup.o signal.o sigreturn_codes.o \
                   stacktrace.o sys_arm.o time.o traps.o
 
+KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_traps.o := n
+
 ifneq ($(CONFIG_ARM_UNWIND),y)
 obj-$(CONFIG_FRAME_POINTER)    += return_address.o
 endif
@@ -88,6 +91,7 @@ obj-$(CONFIG_PARAVIRT)        += paravirt.o
 head-y                 := head$(MMUEXT).o
 obj-$(CONFIG_DEBUG_LL) += debug.o
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
+obj-$(CONFIG_ARM_PATCH_PHYS_VIRT)      += phys2virt.o
 
 # This is executed very early using a temporary stack when no memory allocator
 # nor global data is available. Everything has to be allocated on the stack.
index 067e12e..f2819c2 100644 (file)
@@ -2,11 +2,11 @@
 void convert_to_tag_list(struct tag *tags);
 
 #ifdef CONFIG_ATAGS
-const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer,
+const struct machine_desc *setup_machine_tags(void *__atags_vaddr,
        unsigned int machine_nr);
 #else
 static inline const struct machine_desc * __init __noreturn
-setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
+setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr)
 {
        early_print("no ATAGS support: can't continue\n");
        while (true);
index 6c12d9f..373b61f 100644 (file)
@@ -174,7 +174,7 @@ static void __init squash_mem_tags(struct tag *tag)
 }
 
 const struct machine_desc * __init
-setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
+setup_machine_tags(void *atags_vaddr, unsigned int machine_nr)
 {
        struct tag *tags = (struct tag *)&default_tags;
        const struct machine_desc *mdesc = NULL, *p;
@@ -195,8 +195,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
        if (!mdesc)
                return NULL;
 
-       if (__atags_pointer)
-               tags = phys_to_virt(__atags_pointer);
+       if (atags_vaddr)
+               tags = atags_vaddr;
        else if (mdesc->atag_offset)
                tags = (void *)(PAGE_OFFSET + mdesc->atag_offset);
 
index 7f0745a..28311dd 100644 (file)
@@ -203,12 +203,12 @@ static const void * __init arch_get_next_mach(const char *const **match)
 
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
- * @dt_phys: physical address of dt blob
+ * @dt_virt: virtual address of dt blob
  *
  * If a dtb was passed to the kernel in r2, then use it to choose the
  * correct machine_desc and to setup the system.
  */
-const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
+const struct machine_desc * __init setup_machine_fdt(void *dt_virt)
 {
        const struct machine_desc *mdesc, *mdesc_best = NULL;
 
@@ -221,7 +221,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
        mdesc_best = &__mach_desc_GENERIC_DT;
 #endif
 
-       if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys)))
+       if (!dt_virt || !early_init_dt_verify(dt_virt))
                return NULL;
 
        mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);
index 55a47df..0ea8529 100644 (file)
@@ -252,31 +252,10 @@ __und_svc:
 #else
        svc_entry
 #endif
-       @
-       @ call emulation code, which returns using r9 if it has emulated
-       @ the instruction, or the more conventional lr if we are to treat
-       @ this as a real undefined instruction
-       @
-       @  r0 - instruction
-       @
-#ifndef CONFIG_THUMB2_KERNEL
-       ldr     r0, [r4, #-4]
-#else
-       mov     r1, #2
-       ldrh    r0, [r4, #-2]                   @ Thumb instruction at LR - 2
-       cmp     r0, #0xe800                     @ 32-bit instruction if xx >= 0
-       blo     __und_svc_fault
-       ldrh    r9, [r4]                        @ bottom 16 bits
-       add     r4, r4, #2
-       str     r4, [sp, #S_PC]
-       orr     r0, r9, r0, lsl #16
-#endif
-       badr    r9, __und_svc_finish
-       mov     r2, r4
-       bl      call_fpe
 
        mov     r1, #4                          @ PC correction to apply
-__und_svc_fault:
+ THUMB(        tst     r5, #PSR_T_BIT          )       @ exception taken in Thumb mode?
+ THUMB(        movne   r1, #2                  )       @ if so, fix up PC correction
        mov     r0, sp                          @ struct pt_regs *regs
        bl      __und_fault
 
@@ -427,7 +406,8 @@ ENDPROC(__fiq_abt)
        @ if it was interrupted in a critical region.  Here we
        @ perform a quick test inline since it should be false
        @ 99.9999% of the time.  The rest is done out of line.
-       cmp     r4, #TASK_SIZE
+       ldr     r0, =TASK_SIZE
+       cmp     r4, r0
        blhs    kuser_cmpxchg64_fixup
 #endif
 #endif
index 77d1639..e0d7833 100644 (file)
@@ -50,7 +50,8 @@ __ret_fast_syscall:
  UNWIND(.cantunwind    )
        disable_irq_notrace                     @ disable interrupts
        ldr     r2, [tsk, #TI_ADDR_LIMIT]
-       cmp     r2, #TASK_SIZE
+       ldr     r1, =TASK_SIZE
+       cmp     r2, r1
        blne    addr_limit_check_failed
        ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
        movs    r1, r1, lsl #16
@@ -87,7 +88,8 @@ __ret_fast_syscall:
 #endif
        disable_irq_notrace                     @ disable interrupts
        ldr     r2, [tsk, #TI_ADDR_LIMIT]
-       cmp     r2, #TASK_SIZE
+       ldr     r1, =TASK_SIZE
+       cmp     r2, r1
        blne    addr_limit_check_failed
        ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
        movs    r1, r1, lsl #16
@@ -128,7 +130,8 @@ ret_slow_syscall:
        disable_irq_notrace                     @ disable interrupts
 ENTRY(ret_to_user_from_irq)
        ldr     r2, [tsk, #TI_ADDR_LIMIT]
-       cmp     r2, #TASK_SIZE
+       ldr     r1, =TASK_SIZE
+       cmp     r2, r1
        blne    addr_limit_check_failed
        ldr     r1, [tsk, #TI_FLAGS]
        movs    r1, r1, lsl #16
index 4a39828..29b2eda 100644 (file)
@@ -95,7 +95,7 @@ __mmap_switched:
  THUMB(        ldmia   r4!, {r0, r1, r2, r3} )
  THUMB(        mov     sp, r3 )
        sub     r2, r2, r1
-       bl      memcpy                          @ copy .data to RAM
+       bl      __memcpy                        @ copy .data to RAM
 #endif
 
    ARM(        ldmia   r4!, {r0, r1, sp} )
@@ -103,7 +103,7 @@ __mmap_switched:
  THUMB(        mov     sp, r3 )
        sub     r2, r1, r0
        mov     r1, #0
-       bl      memset                          @ clear .bss
+       bl      __memset                        @ clear .bss
 
        ldmia   r4, {r0, r1, r2, r3}
        str     r9, [r0]                        @ Save processor ID
@@ -111,6 +111,9 @@ __mmap_switched:
        str     r8, [r2]                        @ Save atags pointer
        cmp     r3, #0
        strne   r10, [r3]                       @ Save control register values
+#ifdef CONFIG_KASAN
+       bl      kasan_early_init
+#endif
        mov     lr, #0
        b       start_kernel
 ENDPROC(__mmap_switched)
@@ -170,11 +173,12 @@ ENDPROC(lookup_processor_type)
  *     r9 = cpuid (preserved)
  */
 __lookup_processor_type:
-       adr     r3, __lookup_processor_type_data
-       ldmia   r3, {r4 - r6}
-       sub     r3, r3, r4                      @ get offset between virt&phys
-       add     r5, r5, r3                      @ convert virt addresses to
-       add     r6, r6, r3                      @ physical address space
+       /*
+        * Look in <asm/procinfo.h> for information about the __proc_info
+        * structure.
+        */
+       adr_l   r5, __proc_info_begin
+       adr_l   r6, __proc_info_end
 1:     ldmia   r5, {r3, r4}                    @ value, mask
        and     r4, r4, r9                      @ mask wanted bits
        teq     r3, r4
@@ -186,17 +190,6 @@ __lookup_processor_type:
 2:     ret     lr
 ENDPROC(__lookup_processor_type)
 
-/*
- * Look in <asm/procinfo.h> for information about the __proc_info structure.
- */
-       .align  2
-       .type   __lookup_processor_type_data, %object
-__lookup_processor_type_data:
-       .long   .
-       .long   __proc_info_begin
-       .long   __proc_info_end
-       .size   __lookup_processor_type_data, . - __lookup_processor_type_data
-
 __error_lpae:
 #ifdef CONFIG_DEBUG_LL
        adr     r0, str_lpae
index f890422..7f62c5e 100644 (file)
@@ -103,10 +103,8 @@ ENTRY(stext)
 #endif
 
 #ifndef CONFIG_XIP_KERNEL
-       adr     r3, 2f
-       ldmia   r3, {r4, r8}
-       sub     r4, r3, r4                      @ (PHYS_OFFSET - PAGE_OFFSET)
-       add     r8, r8, r4                      @ PHYS_OFFSET
+       adr_l   r8, _text                       @ __pa(_text)
+       sub     r8, r8, #TEXT_OFFSET            @ PHYS_OFFSET
 #else
        ldr     r8, =PLAT_PHYS_OFFSET           @ always constant in this case
 #endif
@@ -158,10 +156,6 @@ ENTRY(stext)
 1:     b       __enable_mmu
 ENDPROC(stext)
        .ltorg
-#ifndef CONFIG_XIP_KERNEL
-2:     .long   .
-       .long   PAGE_OFFSET
-#endif
 
 /*
  * Setup the initial page tables.  We only setup the barest
@@ -224,11 +218,8 @@ __create_page_tables:
         * Create identity mapping to cater for __enable_mmu.
         * This identity mapping will be removed by paging_init().
         */
-       adr     r0, __turn_mmu_on_loc
-       ldmia   r0, {r3, r5, r6}
-       sub     r0, r0, r3                      @ virt->phys offset
-       add     r5, r5, r0                      @ phys __turn_mmu_on
-       add     r6, r6, r0                      @ phys __turn_mmu_on_end
+       adr_l   r5, __turn_mmu_on               @ _pa(__turn_mmu_on)
+       adr_l   r6, __turn_mmu_on_end           @ _pa(__turn_mmu_on_end)
        mov     r5, r5, lsr #SECTION_SHIFT
        mov     r6, r6, lsr #SECTION_SHIFT
 
@@ -274,11 +265,10 @@ __create_page_tables:
         * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
         */
        mov     r0, r2, lsr #SECTION_SHIFT
-       movs    r0, r0, lsl #SECTION_SHIFT
-       subne   r3, r0, r8
-       addne   r3, r3, #PAGE_OFFSET
-       addne   r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
-       orrne   r6, r7, r0
+       cmp     r2, #0
+       ldrne   r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER)
+       addne   r3, r3, r4
+       orrne   r6, r7, r0, lsl #SECTION_SHIFT
        strne   r6, [r3], #1 << PMD_ORDER
        addne   r6, r6, #1 << SECTION_SHIFT
        strne   r6, [r3]
@@ -351,11 +341,6 @@ __create_page_tables:
        ret     lr
 ENDPROC(__create_page_tables)
        .ltorg
-       .align
-__turn_mmu_on_loc:
-       .long   .
-       .long   __turn_mmu_on
-       .long   __turn_mmu_on_end
 
 #if defined(CONFIG_SMP)
        .text
@@ -391,10 +376,8 @@ ENTRY(secondary_startup)
        /*
         * Use the page tables supplied from  __cpu_up.
         */
-       adr     r4, __secondary_data
-       ldmia   r4, {r5, r7, r12}               @ address to jump to after
-       sub     lr, r4, r5                      @ mmu has been enabled
-       add     r3, r7, lr
+       adr_l   r3, secondary_data
+       mov_l   r12, __secondary_switched
        ldrd    r4, r5, [r3, #0]                @ get secondary_data.pgdir
 ARM_BE8(eor    r4, r4, r5)                     @ Swap r5 and r4 in BE:
 ARM_BE8(eor    r5, r4, r5)                     @ it can be done in 3 steps
@@ -409,22 +392,13 @@ ARM_BE8(eor       r4, r4, r5)                     @ without using a temp reg.
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
-       /*
-        * r6  = &secondary_data
-        */
 ENTRY(__secondary_switched)
-       ldr     sp, [r7, #12]                   @ get secondary_data.stack
+       ldr_l   r7, secondary_data + 12         @ get secondary_data.stack
+       mov     sp, r7
        mov     fp, #0
        b       secondary_start_kernel
 ENDPROC(__secondary_switched)
 
-       .align
-
-       .type   __secondary_data, %object
-__secondary_data:
-       .long   .
-       .long   secondary_data
-       .long   __secondary_switched
 #endif /* defined(CONFIG_SMP) */
 
 
@@ -539,19 +513,11 @@ ARM_BE8(rev       r0, r0)                 @ byteswap if big endian
        retne   lr
 
 __fixup_smp_on_up:
-       adr     r0, 1f
-       ldmia   r0, {r3 - r5}
-       sub     r3, r0, r3
-       add     r4, r4, r3
-       add     r5, r5, r3
+       adr_l   r4, __smpalt_begin
+       adr_l   r5, __smpalt_end
        b       __do_fixup_smp_on_up
 ENDPROC(__fixup_smp)
 
-       .align
-1:     .word   .
-       .word   __smpalt_begin
-       .word   __smpalt_end
-
        .pushsection .data
        .align  2
        .globl  smp_on_up
@@ -565,14 +531,15 @@ smp_on_up:
 __do_fixup_smp_on_up:
        cmp     r4, r5
        reths   lr
-       ldmia   r4!, {r0, r6}
- ARM(  str     r6, [r0, r3]    )
- THUMB(        add     r0, r0, r3      )
+       ldmia   r4, {r0, r6}
+ ARM(  str     r6, [r0, r4]    )
+ THUMB(        add     r0, r0, r4      )
+       add     r4, r4, #8
 #ifdef __ARMEB__
  THUMB(        mov     r6, r6, ror #16 )       @ Convert word order for big-endian.
 #endif
  THUMB(        strh    r6, [r0], #2    )       @ For Thumb-2, store as two halfwords
- THUMB(        mov     r6, r6, lsr #16 )       @ to be robust against misaligned r3.
+ THUMB(        mov     r6, r6, lsr #16 )       @ to be robust against misaligned r0.
  THUMB(        strh    r6, [r0]        )
        b       __do_fixup_smp_on_up
 ENDPROC(__do_fixup_smp_on_up)
@@ -581,151 +548,8 @@ ENTRY(fixup_smp)
        stmfd   sp!, {r4 - r6, lr}
        mov     r4, r0
        add     r5, r0, r1
-       mov     r3, #0
        bl      __do_fixup_smp_on_up
        ldmfd   sp!, {r4 - r6, pc}
 ENDPROC(fixup_smp)
 
-#ifdef __ARMEB__
-#define LOW_OFFSET     0x4
-#define HIGH_OFFSET    0x0
-#else
-#define LOW_OFFSET     0x0
-#define HIGH_OFFSET    0x4
-#endif
-
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-
-/* __fixup_pv_table - patch the stub instructions with the delta between
- * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
- * can be expressed by an immediate shifter operand. The stub instruction
- * has a form of '(add|sub) rd, rn, #imm'.
- */
-       __HEAD
-__fixup_pv_table:
-       adr     r0, 1f
-       ldmia   r0, {r3-r7}
-       mvn     ip, #0
-       subs    r3, r0, r3      @ PHYS_OFFSET - PAGE_OFFSET
-       add     r4, r4, r3      @ adjust table start address
-       add     r5, r5, r3      @ adjust table end address
-       add     r6, r6, r3      @ adjust __pv_phys_pfn_offset address
-       add     r7, r7, r3      @ adjust __pv_offset address
-       mov     r0, r8, lsr #PAGE_SHIFT @ convert to PFN
-       str     r0, [r6]        @ save computed PHYS_OFFSET to __pv_phys_pfn_offset
-       strcc   ip, [r7, #HIGH_OFFSET]  @ save to __pv_offset high bits
-       mov     r6, r3, lsr #24 @ constant for add/sub instructions
-       teq     r3, r6, lsl #24 @ must be 16MiB aligned
-THUMB( it      ne              @ cross section branch )
-       bne     __error
-       str     r3, [r7, #LOW_OFFSET]   @ save to __pv_offset low bits
-       b       __fixup_a_pv_table
-ENDPROC(__fixup_pv_table)
-
-       .align
-1:     .long   .
-       .long   __pv_table_begin
-       .long   __pv_table_end
-2:     .long   __pv_phys_pfn_offset
-       .long   __pv_offset
-
-       .text
-__fixup_a_pv_table:
-       adr     r0, 3f
-       ldr     r6, [r0]
-       add     r6, r6, r3
-       ldr     r0, [r6, #HIGH_OFFSET]  @ pv_offset high word
-       ldr     r6, [r6, #LOW_OFFSET]   @ pv_offset low word
-       mov     r6, r6, lsr #24
-       cmn     r0, #1
-#ifdef CONFIG_THUMB2_KERNEL
-       moveq   r0, #0x200000   @ set bit 21, mov to mvn instruction
-       lsls    r6, #24
-       beq     2f
-       clz     r7, r6
-       lsr     r6, #24
-       lsl     r6, r7
-       bic     r6, #0x0080
-       lsrs    r7, #1
-       orrcs   r6, #0x0080
-       orr     r6, r6, r7, lsl #12
-       orr     r6, #0x4000
-       b       2f
-1:     add     r7, r3
-       ldrh    ip, [r7, #2]
-ARM_BE8(rev16  ip, ip)
-       tst     ip, #0x4000
-       and     ip, #0x8f00
-       orrne   ip, r6  @ mask in offset bits 31-24
-       orreq   ip, r0  @ mask in offset bits 7-0
-ARM_BE8(rev16  ip, ip)
-       strh    ip, [r7, #2]
-       bne     2f
-       ldrh    ip, [r7]
-ARM_BE8(rev16  ip, ip)
-       bic     ip, #0x20
-       orr     ip, ip, r0, lsr #16
-ARM_BE8(rev16  ip, ip)
-       strh    ip, [r7]
-2:     cmp     r4, r5
-       ldrcc   r7, [r4], #4    @ use branch for delay slot
-       bcc     1b
-       bx      lr
-#else
-#ifdef CONFIG_CPU_ENDIAN_BE8
-       moveq   r0, #0x00004000 @ set bit 22, mov to mvn instruction
-#else
-       moveq   r0, #0x400000   @ set bit 22, mov to mvn instruction
-#endif
-       b       2f
-1:     ldr     ip, [r7, r3]
-#ifdef CONFIG_CPU_ENDIAN_BE8
-       @ in BE8, we load data in BE, but instructions still in LE
-       bic     ip, ip, #0xff000000
-       tst     ip, #0x000f0000 @ check the rotation field
-       orrne   ip, ip, r6, lsl #24 @ mask in offset bits 31-24
-       biceq   ip, ip, #0x00004000 @ clear bit 22
-       orreq   ip, ip, r0      @ mask in offset bits 7-0
-#else
-       bic     ip, ip, #0x000000ff
-       tst     ip, #0xf00      @ check the rotation field
-       orrne   ip, ip, r6      @ mask in offset bits 31-24
-       biceq   ip, ip, #0x400000       @ clear bit 22
-       orreq   ip, ip, r0      @ mask in offset bits 7-0
-#endif
-       str     ip, [r7, r3]
-2:     cmp     r4, r5
-       ldrcc   r7, [r4], #4    @ use branch for delay slot
-       bcc     1b
-       ret     lr
-#endif
-ENDPROC(__fixup_a_pv_table)
-
-       .align
-3:     .long __pv_offset
-
-ENTRY(fixup_pv_table)
-       stmfd   sp!, {r4 - r7, lr}
-       mov     r3, #0                  @ no offset
-       mov     r4, r0                  @ r0 = table start
-       add     r5, r0, r1              @ r1 = table size
-       bl      __fixup_a_pv_table
-       ldmfd   sp!, {r4 - r7, pc}
-ENDPROC(fixup_pv_table)
-
-       .data
-       .align  2
-       .globl  __pv_phys_pfn_offset
-       .type   __pv_phys_pfn_offset, %object
-__pv_phys_pfn_offset:
-       .word   0
-       .size   __pv_phys_pfn_offset, . -__pv_phys_pfn_offset
-
-       .globl  __pv_offset
-       .type   __pv_offset, %object
-__pv_offset:
-       .quad   0
-       .size   __pv_offset, . -__pv_offset
-#endif
-
 #include "head-common.S"
index 26d8e03..b699b22 100644 (file)
@@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode)
 .text
 
        /*
-        * Save the primary CPU boot mode. Requires 3 scratch registers.
+        * Save the primary CPU boot mode. Requires 2 scratch registers.
         */
-       .macro  store_primary_cpu_mode  reg1, reg2, reg3
+       .macro  store_primary_cpu_mode  reg1, reg2
        mrs     \reg1, cpsr
        and     \reg1, \reg1, #MODE_MASK
-       adr     \reg2, .L__boot_cpu_mode_offset
-       ldr     \reg3, [\reg2]
-       str     \reg1, [\reg2, \reg3]
+       str_l   \reg1, __boot_cpu_mode, \reg2
        .endm
 
        /*
         * Compare the current mode with the one saved on the primary CPU.
         * If they don't match, record that fact. The Z bit indicates
         * if there's a match or not.
-        * Requires 3 additionnal scratch registers.
+        * Requires 2 additional scratch registers.
         */
-       .macro  compare_cpu_mode_with_primary mode, reg1, reg2, reg3
-       adr     \reg2, .L__boot_cpu_mode_offset
-       ldr     \reg3, [\reg2]
-       ldr     \reg1, [\reg2, \reg3]
+       .macro  compare_cpu_mode_with_primary mode, reg1, reg2
+       adr_l   \reg2, __boot_cpu_mode
+       ldr     \reg1, [\reg2]
        cmp     \mode, \reg1            @ matches primary CPU boot mode?
        orrne   \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH
-       strne   \reg1, [\reg2, \reg3]   @ record what happened and give up
+       strne   \reg1, [\reg2]          @ record what happened and give up
        .endm
 
 #else  /* ZIMAGE */
 
-       .macro  store_primary_cpu_mode  reg1:req, reg2:req, reg3:req
+       .macro  store_primary_cpu_mode  reg1:req, reg2:req
        .endm
 
 /*
  * The zImage loader only runs on one CPU, so we don't bother with mult-CPU
  * consistency checking:
  */
-       .macro  compare_cpu_mode_with_primary mode, reg1, reg2, reg3
+       .macro  compare_cpu_mode_with_primary mode, reg1, reg2
        cmp     \mode, \mode
        .endm
 
@@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode)
  */
 @ Call this from the primary CPU
 ENTRY(__hyp_stub_install)
-       store_primary_cpu_mode  r4, r5, r6
+       store_primary_cpu_mode  r4, r5
 ENDPROC(__hyp_stub_install)
 
        @ fall through...
@@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary)
         * If the secondary has booted with a different mode, give up
         * immediately.
         */
-       compare_cpu_mode_with_primary   r4, r5, r6, r7
+       compare_cpu_mode_with_primary   r4, r5, r6
        retne   lr
 
        /*
@@ -228,12 +225,6 @@ ENTRY(__hyp_soft_restart)
        ret     lr
 ENDPROC(__hyp_soft_restart)
 
-#ifndef ZIMAGE
-.align 2
-.L__boot_cpu_mode_offset:
-       .long   __boot_cpu_mode - .
-#endif
-
 .align 5
 ENTRY(__hyp_stub_vectors)
 __hyp_stub_reset:      W(b)    .
index 0dcae78..d2b4ac0 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/assembler.h>
+#include "iwmmxt.h"
 
 #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
 #define PJ4(code...)           code
@@ -113,33 +114,33 @@ concan_save:
 
 concan_dump:
 
-       wstrw   wCSSF, [r1, #MMX_WCSSF]
-       wstrw   wCASF, [r1, #MMX_WCASF]
-       wstrw   wCGR0, [r1, #MMX_WCGR0]
-       wstrw   wCGR1, [r1, #MMX_WCGR1]
-       wstrw   wCGR2, [r1, #MMX_WCGR2]
-       wstrw   wCGR3, [r1, #MMX_WCGR3]
+       wstrw   wCSSF, r1, MMX_WCSSF
+       wstrw   wCASF, r1, MMX_WCASF
+       wstrw   wCGR0, r1, MMX_WCGR0
+       wstrw   wCGR1, r1, MMX_WCGR1
+       wstrw   wCGR2, r1, MMX_WCGR2
+       wstrw   wCGR3, r1, MMX_WCGR3
 
 1:     @ MUP? wRn
        tst     r2, #0x2
        beq     2f
 
-       wstrd   wR0,  [r1, #MMX_WR0]
-       wstrd   wR1,  [r1, #MMX_WR1]
-       wstrd   wR2,  [r1, #MMX_WR2]
-       wstrd   wR3,  [r1, #MMX_WR3]
-       wstrd   wR4,  [r1, #MMX_WR4]
-       wstrd   wR5,  [r1, #MMX_WR5]
-       wstrd   wR6,  [r1, #MMX_WR6]
-       wstrd   wR7,  [r1, #MMX_WR7]
-       wstrd   wR8,  [r1, #MMX_WR8]
-       wstrd   wR9,  [r1, #MMX_WR9]
-       wstrd   wR10, [r1, #MMX_WR10]
-       wstrd   wR11, [r1, #MMX_WR11]
-       wstrd   wR12, [r1, #MMX_WR12]
-       wstrd   wR13, [r1, #MMX_WR13]
-       wstrd   wR14, [r1, #MMX_WR14]
-       wstrd   wR15, [r1, #MMX_WR15]
+       wstrd   wR0,  r1, MMX_WR0
+       wstrd   wR1,  r1, MMX_WR1
+       wstrd   wR2,  r1, MMX_WR2
+       wstrd   wR3,  r1, MMX_WR3
+       wstrd   wR4,  r1, MMX_WR4
+       wstrd   wR5,  r1, MMX_WR5
+       wstrd   wR6,  r1, MMX_WR6
+       wstrd   wR7,  r1, MMX_WR7
+       wstrd   wR8,  r1, MMX_WR8
+       wstrd   wR9,  r1, MMX_WR9
+       wstrd   wR10, r1, MMX_WR10
+       wstrd   wR11, r1, MMX_WR11
+       wstrd   wR12, r1, MMX_WR12
+       wstrd   wR13, r1, MMX_WR13
+       wstrd   wR14, r1, MMX_WR14
+       wstrd   wR15, r1, MMX_WR15
 
 2:     teq     r0, #0                          @ anything to load?
        reteq   lr                              @ if not, return
@@ -147,30 +148,30 @@ concan_dump:
 concan_load:
 
        @ Load wRn
-       wldrd   wR0,  [r0, #MMX_WR0]
-       wldrd   wR1,  [r0, #MMX_WR1]
-       wldrd   wR2,  [r0, #MMX_WR2]
-       wldrd   wR3,  [r0, #MMX_WR3]
-       wldrd   wR4,  [r0, #MMX_WR4]
-       wldrd   wR5,  [r0, #MMX_WR5]
-       wldrd   wR6,  [r0, #MMX_WR6]
-       wldrd   wR7,  [r0, #MMX_WR7]
-       wldrd   wR8,  [r0, #MMX_WR8]
-       wldrd   wR9,  [r0, #MMX_WR9]
-       wldrd   wR10, [r0, #MMX_WR10]
-       wldrd   wR11, [r0, #MMX_WR11]
-       wldrd   wR12, [r0, #MMX_WR12]
-       wldrd   wR13, [r0, #MMX_WR13]
-       wldrd   wR14, [r0, #MMX_WR14]
-       wldrd   wR15, [r0, #MMX_WR15]
+       wldrd   wR0,  r0, MMX_WR0
+       wldrd   wR1,  r0, MMX_WR1
+       wldrd   wR2,  r0, MMX_WR2
+       wldrd   wR3,  r0, MMX_WR3
+       wldrd   wR4,  r0, MMX_WR4
+       wldrd   wR5,  r0, MMX_WR5
+       wldrd   wR6,  r0, MMX_WR6
+       wldrd   wR7,  r0, MMX_WR7
+       wldrd   wR8,  r0, MMX_WR8
+       wldrd   wR9,  r0, MMX_WR9
+       wldrd   wR10, r0, MMX_WR10
+       wldrd   wR11, r0, MMX_WR11
+       wldrd   wR12, r0, MMX_WR12
+       wldrd   wR13, r0, MMX_WR13
+       wldrd   wR14, r0, MMX_WR14
+       wldrd   wR15, r0, MMX_WR15
 
        @ Load wCx
-       wldrw   wCSSF, [r0, #MMX_WCSSF]
-       wldrw   wCASF, [r0, #MMX_WCASF]
-       wldrw   wCGR0, [r0, #MMX_WCGR0]
-       wldrw   wCGR1, [r0, #MMX_WCGR1]
-       wldrw   wCGR2, [r0, #MMX_WCGR2]
-       wldrw   wCGR3, [r0, #MMX_WCGR3]
+       wldrw   wCSSF, r0, MMX_WCSSF
+       wldrw   wCASF, r0, MMX_WCASF
+       wldrw   wCGR0, r0, MMX_WCGR0
+       wldrw   wCGR1, r0, MMX_WCGR1
+       wldrw   wCGR2, r0, MMX_WCGR2
+       wldrw   wCGR3, r0, MMX_WCGR3
 
        @ clear CUP/MUP (only if r1 != 0)
        teq     r1, #0
diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h
new file mode 100644 (file)
index 0000000..fb62728
--- /dev/null
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IWMMXT_H__
+#define __IWMMXT_H__
+
+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+.set .LwR\b, \b
+.set .Lr\b, \b
+.endr
+
+.set .LwCSSF, 0x2
+.set .LwCASF, 0x3
+.set .LwCGR0, 0x8
+.set .LwCGR1, 0x9
+.set .LwCGR2, 0xa
+.set .LwCGR3, 0xb
+
+.macro wldrd, reg:req, base:req, offset:req
+.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+.macro wldrw, reg:req, base:req, offset:req
+.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+.macro wstrd, reg:req, base:req, offset:req
+.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+.macro wstrw, reg:req, base:req, offset:req
+.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+#ifdef __clang__
+
+#define wCon c1
+
+.macro tmrc, dest:req, control:req
+mrc p1, 0, \dest, \control, c0, 0
+.endm
+
+.macro tmcr, control:req, src:req
+mcr p1, 0, \src, \control, c0, 0
+.endm
+#endif
+
+#endif
index e15444b..beac45e 100644 (file)
@@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
                        *(u32 *)loc |= offset & 0x7fffffff;
                        break;
 
+               case R_ARM_REL32:
+                       *(u32 *)loc += sym->st_value - loc;
+                       break;
+
                case R_ARM_MOVW_ABS_NC:
                case R_ARM_MOVT_ABS:
+               case R_ARM_MOVW_PREL_NC:
+               case R_ARM_MOVT_PREL:
                        offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
                        offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
                        offset = (offset ^ 0x8000) - 0x8000;
 
                        offset += sym->st_value;
-                       if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
+                       if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL ||
+                           ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC)
+                               offset -= loc;
+                       if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS ||
+                           ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL)
                                offset >>= 16;
 
                        tmp &= 0xfff0f000;
@@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 
                case R_ARM_THM_MOVW_ABS_NC:
                case R_ARM_THM_MOVT_ABS:
+               case R_ARM_THM_MOVW_PREL_NC:
+               case R_ARM_THM_MOVT_PREL:
                        upper = __mem_to_opcode_thumb16(*(u16 *)loc);
                        lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
@@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
                        offset = (offset ^ 0x8000) - 0x8000;
                        offset += sym->st_value;
 
-                       if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
+                       if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL ||
+                           ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC)
+                               offset -= loc;
+                       if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS ||
+                           ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL)
                                offset >>= 16;
 
                        upper = (u16)((upper & 0xfbf0) |
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
new file mode 100644 (file)
index 0000000..fb53db7
--- /dev/null
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright (C) 1994-2002 Russell King
+ *  Copyright (c) 2003, 2020 ARM Limited
+ *  All Rights Reserved
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+
+#ifdef __ARMEB__
+#define LOW_OFFSET     0x4
+#define HIGH_OFFSET    0x0
+#else
+#define LOW_OFFSET     0x0
+#define HIGH_OFFSET    0x4
+#endif
+
+/*
+ * __fixup_pv_table - patch the stub instructions with the delta between
+ *                    PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
+ *                    2 MiB aligned.
+ *
+ * Called from head.S, which expects the following registers to be preserved:
+ *   r1 = machine no, r2 = atags or dtb,
+ *   r8 = phys_offset, r9 = cpuid, r10 = procinfo
+ */
+       __HEAD
+ENTRY(__fixup_pv_table)
+       mov     r0, r8, lsr #PAGE_SHIFT @ convert to PFN
+       str_l   r0, __pv_phys_pfn_offset, r3
+
+       adr_l   r0, __pv_offset
+       subs    r3, r8, #PAGE_OFFSET    @ PHYS_OFFSET - PAGE_OFFSET
+       mvn     ip, #0
+       strcc   ip, [r0, #HIGH_OFFSET]  @ save to __pv_offset high bits
+       str     r3, [r0, #LOW_OFFSET]   @ save to __pv_offset low bits
+
+       mov     r0, r3, lsr #21         @ constant for add/sub instructions
+       teq     r3, r0, lsl #21         @ must be 2 MiB aligned
+       bne     0f
+
+       adr_l   r4, __pv_table_begin
+       adr_l   r5, __pv_table_end
+       b       __fixup_a_pv_table
+
+0:     mov     r0, r0                  @ deadloop on error
+       b       0b
+ENDPROC(__fixup_pv_table)
+
+       .text
+__fixup_a_pv_table:
+       adr_l   r6, __pv_offset
+       ldr     r0, [r6, #HIGH_OFFSET]  @ pv_offset high word
+       ldr     r6, [r6, #LOW_OFFSET]   @ pv_offset low word
+       cmn     r0, #1
+#ifdef CONFIG_THUMB2_KERNEL
+       @
+       @ The Thumb-2 versions of the patchable sequences are
+       @
+       @ phys-to-virt:                 movw    <reg>, #offset<31:21>
+       @                               lsl     <reg>, #21
+       @                               sub     <VA>, <PA>, <reg>
+       @
+       @ virt-to-phys (non-LPAE):      movw    <reg>, #offset<31:21>
+       @                               lsl     <reg>, #21
+       @                               add     <PA>, <VA>, <reg>
+       @
+       @ virt-to-phys (LPAE):          movw    <reg>, #offset<31:21>
+       @                               lsl     <reg>, #21
+       @                               adds    <PAlo>, <VA>, <reg>
+       @                               mov     <PAhi>, #offset<39:32>
+       @                               adc     <PAhi>, <PAhi>, #0
+       @
+       @ In the non-LPAE case, all patchable instructions are MOVW
+       @ instructions, where we need to patch in the offset into the
+       @ second halfword of the opcode (the 16-bit immediate is encoded
+       @ as imm4:i:imm3:imm8)
+       @
+       @       15       11 10  9           4 3    0  15  14  12 11 8 7    0
+       @      +-----------+---+-------------+------++---+------+----+------+
+       @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
+       @      +-----------+---+-------------+------++---+------+----+------+
+       @
+       @ In the LPAE case, we also need to patch in the high word of the
+       @ offset into the immediate field of the MOV instruction, or patch it
+       @ to a MVN instruction if the offset is negative. In this case, we
+       @ need to inspect the first halfword of the opcode, to check whether
+       @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
+       @ needed. The encoding of the immediate is rather complex for values
+       @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
+       @ order bits, which can be patched into imm8 directly (and i:imm3
+       @ cleared)
+       @
+       @      15       11 10  9        5         0  15  14  12 11 8 7    0
+       @     +-----------+---+---------------------++---+------+----+------+
+       @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+       @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+       @     +-----------+---+---------------------++---+------+----+------+
+       @
+       moveq   r0, #0x200000           @ set bit 21, mov to mvn instruction
+       lsrs    r3, r6, #29             @ isolate top 3 bits of displacement
+       ubfx    r6, r6, #21, #8         @ put bits 28:21 into the MOVW imm8 field
+       bfi     r6, r3, #12, #3         @ put bits 31:29 into the MOVW imm3 field
+       b       .Lnext
+.Lloop:        add     r7, r4
+       adds    r4, #4                  @ clears Z flag
+#ifdef CONFIG_ARM_LPAE
+       ldrh    ip, [r7]
+ARM_BE8(rev16  ip, ip)
+       tst     ip, #0x200              @ MOVW has bit 9 set, MVN has it clear
+       bne     0f                      @ skip to MOVW handling (Z flag is clear)
+       bic     ip, #0x20               @ clear bit 5 (MVN -> MOV)
+       orr     ip, ip, r0, lsr #16     @ MOV -> MVN if offset < 0
+ARM_BE8(rev16  ip, ip)
+       strh    ip, [r7]
+       @ Z flag is set
+0:
+#endif
+       ldrh    ip, [r7, #2]
+ARM_BE8(rev16  ip, ip)
+       and     ip, #0xf00              @ clear everything except Rd field
+       orreq   ip, r0                  @ Z flag set -> MOV/MVN -> patch in high bits
+       orrne   ip, r6                  @ Z flag clear -> MOVW -> patch in low bits
+ARM_BE8(rev16  ip, ip)
+       strh    ip, [r7, #2]
+#else
+#ifdef CONFIG_CPU_ENDIAN_BE8
+@ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT24       0x00000001
+#define PV_IMM8_MASK   0xff000000
+#define PV_IMMR_MSB    0x00080000
+#else
+#define PV_BIT24       0x01000000
+#define PV_IMM8_MASK   0x000000ff
+#define PV_IMMR_MSB    0x00000800
+#endif
+
+       @
+       @ The ARM versions of the patchable sequences are
+       @
+       @ phys-to-virt:                 sub     <VA>, <PA>, #offset<31:24>, lsl #24
+       @                               sub     <VA>, <PA>, #offset<23:16>, lsl #16
+       @
+       @ virt-to-phys (non-LPAE):      add     <PA>, <VA>, #offset<31:24>, lsl #24
+       @                               add     <PA>, <VA>, #offset<23:16>, lsl #16
+       @
+       @ virt-to-phys (LPAE):          movw    <reg>, #offset<31:20>
+       @                               adds    <PAlo>, <VA>, <reg>, lsl #20
+       @                               mov     <PAhi>, #offset<39:32>
+       @                               adc     <PAhi>, <PAhi>, #0
+       @
+       @ In the non-LPAE case, all patchable instructions are ADD or SUB
+       @ instructions, where we need to patch in the offset into the
+       @ immediate field of the opcode, which is emitted with the correct
+       @ rotation value. (The effective value of the immediate is imm12<7:0>
+       @ rotated right by [2 * imm12<11:8>] bits)
+       @
+       @      31   28 27      23 22  20 19  16 15  12 11    0
+       @      +------+-----------------+------+------+-------+
+       @  ADD | cond | 0 0 1 0 1 0 0 0 |  Rn  |  Rd  | imm12 |
+       @  SUB | cond | 0 0 1 0 0 1 0 0 |  Rn  |  Rd  | imm12 |
+       @  MOV | cond | 0 0 1 1 1 0 1 0 |  Rn  |  Rd  | imm12 |
+       @  MVN | cond | 0 0 1 1 1 1 1 0 |  Rn  |  Rd  | imm12 |
+       @      +------+-----------------+------+------+-------+
+       @
+       @ In the LPAE case, we use a MOVW instruction to carry the low offset
+       @ word, and patch in the high word of the offset into the immediate
+       @ field of the subsequent MOV instruction, or patch it to a MVN
+       @ instruction if the offset is negative. We can distinguish MOVW
+       @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
+       @ distinguished from MOV/MVN (all using the encodings above) using
+       @ bit 24.
+       @
+       @      31   28 27      23 22  20 19  16 15  12 11    0
+       @      +------+-----------------+------+------+-------+
+       @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 |  Rd  | imm12 |
+       @      +------+-----------------+------+------+-------+
+       @
+       moveq   r0, #0x400000           @ set bit 22, mov to mvn instruction
+       mov     r3, r6, lsr #16         @ put offset bits 31-16 into r3
+       mov     r6, r6, lsr #24         @ put offset bits 31-24 into r6
+       and     r3, r3, #0xf0           @ only keep offset bits 23-20 in r3
+       b       .Lnext
+.Lloop:        ldr     ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+       tst     ip, #PV_BIT24           @ ADD/SUB have bit 24 clear
+       beq     1f
+ARM_BE8(rev    ip, ip)
+       tst     ip, #0xc00000           @ MOVW has bits 23:22 clear
+       bic     ip, ip, #0x400000       @ clear bit 22
+       bfc     ip, #0, #12             @ clear imm12 field of MOV[W] instruction
+       orreq   ip, ip, r6, lsl #4      @ MOVW -> mask in offset bits 31-24
+       orreq   ip, ip, r3, lsr #4      @ MOVW -> mask in offset bits 23-20
+       orrne   ip, ip, r0              @ MOV  -> mask in offset bits 7-0 (or bit 22)
+ARM_BE8(rev    ip, ip)
+       b       2f
+1:
+#endif
+       tst     ip, #PV_IMMR_MSB                @ rotation value >= 16 ?
+       bic     ip, ip, #PV_IMM8_MASK
+       orreq   ip, ip, r6 ARM_BE8(, lsl #24)   @ mask in offset bits 31-24
+       orrne   ip, ip, r3 ARM_BE8(, lsl #24)   @ mask in offset bits 23-20
+2:
+       str     ip, [r7, r4]
+       add     r4, r4, #4
+#endif
+
+.Lnext:
+       cmp     r4, r5
+       ldrcc   r7, [r4]                @ use branch for delay slot
+       bcc     .Lloop
+       ret     lr
+ENDPROC(__fixup_a_pv_table)
+
+ENTRY(fixup_pv_table)
+       stmfd   sp!, {r4 - r7, lr}
+       mov     r4, r0                  @ r0 = table start
+       add     r5, r0, r1              @ r1 = table size
+       bl      __fixup_a_pv_table
+       ldmfd   sp!, {r4 - r7, pc}
+ENDPROC(fixup_pv_table)
+
+       .data
+       .align  2
+       .globl  __pv_phys_pfn_offset
+       .type   __pv_phys_pfn_offset, %object
+__pv_phys_pfn_offset:
+       .word   0
+       .size   __pv_phys_pfn_offset, . -__pv_phys_pfn_offset
+
+       .globl  __pv_offset
+       .type   __pv_offset, %object
+__pv_offset:
+       .quad   0
+       .size   __pv_offset, . -__pv_offset
index 3f65d0a..1a5edf5 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/of_platform.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
+#include <linux/libfdt.h>
 #include <linux/of_fdt.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -58,6 +59,7 @@
 #include <asm/unwind.h>
 #include <asm/memblock.h>
 #include <asm/virt.h>
+#include <asm/kasan.h>
 
 #include "atags.h"
 
@@ -763,7 +765,7 @@ int __init arm_add_memory(u64 start, u64 size)
 #ifndef CONFIG_PHYS_ADDR_T_64BIT
        if (aligned_start > ULONG_MAX) {
                pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n",
-                       (long long)start);
+                       start);
                return -EINVAL;
        }
 
@@ -1081,19 +1083,27 @@ void __init hyp_mode_check(void)
 
 void __init setup_arch(char **cmdline_p)
 {
-       const struct machine_desc *mdesc;
+       const struct machine_desc *mdesc = NULL;
+       void *atags_vaddr = NULL;
+
+       if (__atags_pointer)
+               atags_vaddr = FDT_VIRT_BASE(__atags_pointer);
 
        setup_processor();
-       mdesc = setup_machine_fdt(__atags_pointer);
+       if (atags_vaddr) {
+               mdesc = setup_machine_fdt(atags_vaddr);
+               if (mdesc)
+                       memblock_reserve(__atags_pointer,
+                                        fdt_totalsize(atags_vaddr));
+       }
        if (!mdesc)
-               mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
+               mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type);
        if (!mdesc) {
                early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n");
                early_print("  r1=0x%08x, r2=0x%08x\n", __machine_arch_type,
                            __atags_pointer);
                if (__atags_pointer)
-                       early_print("  r2[]=%*ph\n", 16,
-                                   phys_to_virt(__atags_pointer));
+                       early_print("  r2[]=%*ph\n", 16, atags_vaddr);
                dump_machine_table();
        }
 
@@ -1126,7 +1136,7 @@ void __init setup_arch(char **cmdline_p)
        efi_init();
        /*
         * Make sure the calculation for lowmem/highmem is set appropriately
-        * before reserving/allocating any mmeory
+        * before reserving/allocating any memory
         */
        adjust_lowmem_bounds();
        arm_memblock_init(mdesc);
@@ -1136,6 +1146,7 @@ void __init setup_arch(char **cmdline_p)
        early_ioremap_reset();
 
        paging_init(mdesc);
+       kasan_init();
        request_standard_resources(mdesc);
 
        if (mdesc->restart)
index 5dc8b80..43077e1 100644 (file)
@@ -72,8 +72,9 @@ ENTRY(__cpu_suspend)
        ldr     r3, =sleep_save_sp
        stmfd   sp!, {r0, r1}           @ save suspend func arg and pointer
        ldr     r3, [r3, #SLEEP_SAVE_SP_VIRT]
-       ALT_SMP(ldr r0, =mpidr_hash)
+       ALT_SMP(W(nop))                 @ don't use adr_l inside ALT_SMP()
        ALT_UP_B(1f)
+       adr_l   r0, mpidr_hash
        /* This ldmia relies on the memory layout of the mpidr_hash struct */
        ldmia   r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
        compute_mpidr_hash      r0, r6, r7, r8, r2, r1
@@ -147,9 +148,8 @@ no_hyp:
        mov     r1, #0
        ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
        ALT_UP_B(1f)
-       adr     r2, mpidr_hash_ptr
-       ldr     r3, [r2]
-       add     r2, r2, r3              @ r2 = struct mpidr_hash phys address
+       adr_l   r2, mpidr_hash          @ r2 = struct mpidr_hash phys address
+
        /*
         * This ldmia relies on the memory layout of the mpidr_hash
         * struct mpidr_hash.
@@ -157,10 +157,7 @@ no_hyp:
        ldmia   r2, { r3-r6 }   @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
        compute_mpidr_hash      r1, r4, r5, r6, r0, r3
 1:
-       adr     r0, _sleep_save_sp
-       ldr     r2, [r0]
-       add     r0, r0, r2
-       ldr     r0, [r0, #SLEEP_SAVE_SP_PHYS]
+       ldr_l   r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
        ldr     r0, [r0, r1, lsl #2]
 
        @ load phys pgd, stack, resume fn
@@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm)
 ENDPROC(cpu_resume_no_hyp)
 #endif
 
-       .align 2
-_sleep_save_sp:
-       .long   sleep_save_sp - .
-mpidr_hash_ptr:
-       .long   mpidr_hash - .                  @ mpidr_hash struct offset
-
        .data
        .align  2
        .type   sleep_save_sp, #object
index 48099c6..6ab2b0a 100644 (file)
@@ -524,14 +524,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 }
 
 static const char *ipi_types[NR_IPI] __tracepoint_string = {
-#define S(x,s) [x] = s
-       S(IPI_WAKEUP, "CPU wakeup interrupts"),
-       S(IPI_TIMER, "Timer broadcast interrupts"),
-       S(IPI_RESCHEDULE, "Rescheduling interrupts"),
-       S(IPI_CALL_FUNC, "Function call interrupts"),
-       S(IPI_CPU_STOP, "CPU stop interrupts"),
-       S(IPI_IRQ_WORK, "IRQ work interrupts"),
-       S(IPI_COMPLETION, "completion interrupts"),
+       [IPI_WAKEUP]            = "CPU wakeup interrupts",
+       [IPI_TIMER]             = "Timer broadcast interrupts",
+       [IPI_RESCHEDULE]        = "Rescheduling interrupts",
+       [IPI_CALL_FUNC]         = "Function call interrupts",
+       [IPI_CPU_STOP]          = "CPU stop interrupts",
+       [IPI_IRQ_WORK]          = "IRQ work interrupts",
+       [IPI_COMPLETION]        = "completion interrupts",
 };
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
index d2bd0df..59fdf25 100644 (file)
@@ -18,9 +18,6 @@
 #warning Your compiler does not have EABI support.
 #warning    ARM unwind is known to compile only with EABI compilers.
 #warning    Change compiler or disable ARM_UNWIND option.
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__)
-#warning Your compiler is too buggy; it is known to not compile ARM unwind support.
-#warning    Change compiler or disable ARM_UNWIND option.
 #endif
 #endif /* __CHECKER__ */
 
@@ -236,7 +233,11 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl,
                if (*vsp >= (unsigned long *)ctrl->sp_high)
                        return -URC_FAILURE;
 
-       ctrl->vrs[reg] = *(*vsp)++;
+       /* Use READ_ONCE_NOCHECK here to avoid this memory access
+        * from being tracked by KASAN.
+        */
+       ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp));
+       (*vsp)++;
        return URC_OK;
 }
 
index 09a3331..e4caf48 100644 (file)
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
+ENTRY(__memcpy)
 ENTRY(mmiocpy)
-ENTRY(memcpy)
+WEAK(memcpy)
 
 #include "copy_template.S"
 
 ENDPROC(memcpy)
 ENDPROC(mmiocpy)
+ENDPROC(__memcpy)
index b50e577..6fecc12 100644 (file)
  * occurring in the opposite direction.
  */
 
-ENTRY(memmove)
+ENTRY(__memmove)
+WEAK(memmove)
        UNWIND( .fnstart                        )
 
                subs    ip, r0, r1
                cmphi   r2, ip
-               bls     memcpy
+               bls     __memcpy
 
                stmfd   sp!, {r0, r4, lr}
        UNWIND( .fnend                          )
@@ -222,3 +223,4 @@ ENTRY(memmove)
 18:            backward_copy_shift     push=24 pull=8
 
 ENDPROC(memmove)
+ENDPROC(__memmove)
index 6ca4535..9817cb2 100644 (file)
@@ -13,8 +13,9 @@
        .text
        .align  5
 
+ENTRY(__memset)
 ENTRY(mmioset)
-ENTRY(memset)
+WEAK(memset)
 UNWIND( .fnstart         )
        ands    r3, r0, #3              @ 1 unaligned?
        mov     ip, r0                  @ preserve r0 as return value
@@ -132,6 +133,7 @@ UNWIND( .fnstart            )
 UNWIND( .fnend   )
 ENDPROC(memset)
 ENDPROC(mmioset)
+ENDPROC(__memset)
 
 ENTRY(__memset32)
 UNWIND( .fnstart         )
index 53d51aa..8a43ed1 100644 (file)
@@ -297,6 +297,15 @@ static const struct s3c_adc_bat_thresh bat_lut_acin[] = {
        { .volt = 3841, .cur = 0, .level = 0},
 };
 
+static struct gpiod_lookup_table h1940_bat_gpio_table = {
+       .dev_id = "s3c-adc-battery",
+       .table = {
+               /* Charge status S3C2410_GPF(3) */
+               GPIO_LOOKUP("GPIOF", 3, "charge-status", GPIO_ACTIVE_LOW),
+               { },
+       },
+};
+
 static int h1940_bat_init(void)
 {
        int ret;
@@ -330,8 +339,6 @@ static struct s3c_adc_bat_pdata h1940_bat_cfg = {
        .exit = h1940_bat_exit,
        .enable_charger = h1940_enable_charger,
        .disable_charger = h1940_disable_charger,
-       .gpio_charge_finished = S3C2410_GPF(3),
-       .gpio_inverted = 1,
        .lut_noac = bat_lut_noac,
        .lut_noac_cnt = ARRAY_SIZE(bat_lut_noac),
        .lut_acin = bat_lut_acin,
@@ -720,6 +727,7 @@ static void __init h1940_init(void)
        s3c24xx_fb_set_platdata(&h1940_fb_info);
        gpiod_add_lookup_table(&h1940_mmc_gpio_table);
        gpiod_add_lookup_table(&h1940_audio_gpio_table);
+       gpiod_add_lookup_table(&h1940_bat_gpio_table);
        /* Configure the I2S pins (GPE0...GPE4) in correct mode */
        s3c_gpio_cfgall_range(S3C2410_GPE(0), 5, S3C_GPIO_SFN(2),
                              S3C_GPIO_PULL_NONE);
index b9758f0..6e19add 100644 (file)
@@ -206,6 +206,15 @@ static const struct s3c_adc_bat_thresh bat_lut_acin[] = {
        { .volt = 3820, .cur = 0, .level = 0},
 };
 
+static struct gpiod_lookup_table rx1950_bat_gpio_table = {
+       .dev_id = "s3c-adc-battery",
+       .table = {
+               /* Charge status S3C2410_GPF(3) */
+               GPIO_LOOKUP("GPIOF", 3, "charge-status", GPIO_ACTIVE_HIGH),
+               { },
+       },
+};
+
 static int rx1950_bat_init(void)
 {
        int ret;
@@ -331,7 +340,6 @@ static struct s3c_adc_bat_pdata rx1950_bat_cfg = {
        .exit = rx1950_bat_exit,
        .enable_charger = rx1950_enable_charger,
        .disable_charger = rx1950_disable_charger,
-       .gpio_charge_finished = S3C2410_GPF(3),
        .lut_noac = bat_lut_noac,
        .lut_noac_cnt = ARRAY_SIZE(bat_lut_noac),
        .lut_acin = bat_lut_acin,
@@ -840,6 +848,7 @@ static void __init rx1950_init_machine(void)
 
        pwm_add_table(rx1950_pwm_lookup, ARRAY_SIZE(rx1950_pwm_lookup));
        gpiod_add_lookup_table(&rx1950_audio_gpio_table);
+       gpiod_add_lookup_table(&rx1950_bat_gpio_table);
        /* Configure the I2S pins (GPE0...GPE4) in correct mode */
        s3c_gpio_cfgall_range(S3C2410_GPE(0), 5, S3C_GPIO_SFN(2),
                              S3C_GPIO_PULL_NONE);
index bd3a52f..d4e89a0 100644 (file)
@@ -98,6 +98,26 @@ static struct mcp_plat_data collie_mcp_data = {
        .codec_pdata    = &collie_ucb1x00_data,
 };
 
+/* Battery management GPIOs */
+static struct gpiod_lookup_table collie_battery_gpiod_table = {
+       /* the MCP codec mcp0 has the ucb1x00 as attached device */
+       .dev_id = "ucb1x00",
+       .table = {
+               /* This is found on the main GPIO on the SA1100 */
+               GPIO_LOOKUP("gpio", COLLIE_GPIO_CO,
+                           "main battery full", GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio", COLLIE_GPIO_MAIN_BAT_LOW,
+                           "main battery low", GPIO_ACTIVE_HIGH),
+               /*
+                * This is GPIO 0 on the Scoop expander, which is registered
+                * from common/scoop.c with this gpio chip label.
+                */
+               GPIO_LOOKUP("sharp-scoop", 0,
+                           "main charge on", GPIO_ACTIVE_HIGH),
+               { },
+       },
+};
+
 static int collie_ir_startup(struct device *dev)
 {
        int rc = gpio_request(COLLIE_GPIO_IR_ON, "IrDA");
@@ -395,6 +415,7 @@ static void __init collie_init(void)
        platform_scoop_config = &collie_pcmcia_config;
 
        gpiod_add_lookup_table(&collie_power_gpiod_table);
+       gpiod_add_lookup_table(&collie_battery_gpiod_table);
 
        ret = platform_add_devices(devices, ARRAY_SIZE(devices));
        if (ret) {
index 65e4482..02692fb 100644 (file)
@@ -743,6 +743,7 @@ config SWP_EMULATE
 config CPU_BIG_ENDIAN
        bool "Build big-endian kernel"
        depends on ARCH_SUPPORTS_BIG_ENDIAN
+       depends on !LD_IS_LLD
        help
          Say Y if you plan on running a kernel in big-endian mode.
          Note that your board must be properly built and your board
index c4ce477..3510503 100644 (file)
@@ -7,6 +7,7 @@ obj-y                           := extable.o fault.o init.o iomap.o
 obj-y                          += dma-mapping$(MMUEXT).o
 obj-$(CONFIG_MMU)              += fault-armv.o flush.o idmap.o ioremap.o \
                                   mmap.o pgd.o mmu.o pageattr.o
+KASAN_SANITIZE_mmu.o           := n
 
 ifneq ($(CONFIG_MMU),y)
 obj-y                          += nommu.o
@@ -16,6 +17,7 @@ endif
 obj-$(CONFIG_ARM_PTDUMP_CORE)  += dump.o
 obj-$(CONFIG_ARM_PTDUMP_DEBUGFS)       += ptdump_debugfs.o
 obj-$(CONFIG_MODULES)          += proc-syms.o
+KASAN_SANITIZE_physaddr.o      := n
 obj-$(CONFIG_DEBUG_VIRTUAL)    += physaddr.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
@@ -110,3 +112,6 @@ obj-$(CONFIG_CACHE_L2X0_PMU)        += cache-l2x0-pmu.o
 obj-$(CONFIG_CACHE_XSC3L2)     += cache-xsc3l2.o
 obj-$(CONFIG_CACHE_TAUROS2)    += cache-tauros2.o
 obj-$(CONFIG_CACHE_UNIPHIER)   += cache-uniphier.o
+
+KASAN_SANITIZE_kasan_init.o    := n
+obj-$(CONFIG_KASAN)            += kasan_init.o
index db623d7..828a256 100644 (file)
@@ -223,7 +223,6 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
        if (mdesc->reserve)
                mdesc->reserve();
 
-       early_init_fdt_reserve_self();
        early_init_fdt_scan_reserved_mem();
 
        /* reserve memory for DMA contiguous allocations */
diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
new file mode 100644 (file)
index 0000000..9c34804
--- /dev/null
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file contains kasan initialization code for ARM.
+ *
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/start_kernel.h>
+#include <linux/pgtable.h>
+#include <asm/cputype.h>
+#include <asm/highmem.h>
+#include <asm/mach/map.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/procinfo.h>
+#include <asm/proc-fns.h>
+
+#include "mm.h"
+
+static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
+
+static __init void *kasan_alloc_block(size_t size)
+{
+       return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
+                                     MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+                                     unsigned long end, bool early)
+{
+       unsigned long next;
+       pte_t *ptep = pte_offset_kernel(pmdp, addr);
+
+       do {
+               pte_t entry;
+               void *p;
+
+               next = addr + PAGE_SIZE;
+
+               if (!early) {
+                       if (!pte_none(READ_ONCE(*ptep)))
+                               continue;
+
+                       p = kasan_alloc_block(PAGE_SIZE);
+                       if (!p) {
+                               panic("%s failed to allocate shadow page for address 0x%lx\n",
+                                     __func__, addr);
+                               return;
+                       }
+                       memset(p, KASAN_SHADOW_INIT, PAGE_SIZE);
+                       entry = pfn_pte(virt_to_pfn(p),
+                                       __pgprot(pgprot_val(PAGE_KERNEL)));
+               } else if (pte_none(READ_ONCE(*ptep))) {
+                       /*
+                        * The early shadow memory is mapping all KASan
+                        * operations to one and the same page in memory,
+                        * "kasan_early_shadow_page" so that the instrumentation
+                        * will work on a scratch area until we can set up the
+                        * proper KASan shadow memory.
+                        */
+                       entry = pfn_pte(virt_to_pfn(kasan_early_shadow_page),
+                                       __pgprot(_L_PTE_DEFAULT | L_PTE_DIRTY | L_PTE_XN));
+               } else {
+                       /*
+                        * Early shadow mappings are PMD_SIZE aligned, so if the
+                        * first entry is already set, they must all be set.
+                        */
+                       return;
+               }
+
+               set_pte_at(&init_mm, addr, ptep, entry);
+       } while (ptep++, addr = next, addr != end);
+}
+
+/*
+ * The pmd (page middle directory) is only used on LPAE
+ */
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+                                     unsigned long end, bool early)
+{
+       unsigned long next;
+       pmd_t *pmdp = pmd_offset(pudp, addr);
+
+       do {
+               if (pmd_none(*pmdp)) {
+                       /*
+                        * We attempt to allocate a shadow block for the PMDs
+                        * used by the PTEs for this address if it isn't already
+                        * allocated.
+                        */
+                       void *p = early ? kasan_early_shadow_pte :
+                               kasan_alloc_block(PAGE_SIZE);
+
+                       if (!p) {
+                               panic("%s failed to allocate shadow block for address 0x%lx\n",
+                                     __func__, addr);
+                               return;
+                       }
+                       pmd_populate_kernel(&init_mm, pmdp, p);
+                       flush_pmd_entry(pmdp);
+               }
+
+               next = pmd_addr_end(addr, end);
+               kasan_pte_populate(pmdp, addr, next, early);
+       } while (pmdp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+                                     bool early)
+{
+       unsigned long next;
+       pgd_t *pgdp;
+       p4d_t *p4dp;
+       pud_t *pudp;
+
+       pgdp = pgd_offset_k(addr);
+
+       do {
+               /*
+                * Allocate and populate the shadow block of p4d folded into
+                * pud folded into pmd if it doesn't already exist
+                */
+               if (!early && pgd_none(*pgdp)) {
+                       void *p = kasan_alloc_block(PAGE_SIZE);
+
+                       if (!p) {
+                               panic("%s failed to allocate shadow block for address 0x%lx\n",
+                                     __func__, addr);
+                               return;
+                       }
+                       pgd_populate(&init_mm, pgdp, p);
+               }
+
+               next = pgd_addr_end(addr, end);
+               /*
+                * We just immediately jump over the p4d and pud page
+                * directories since we believe ARM32 will never gain four
+                * nor five level page tables.
+                */
+               p4dp = p4d_offset(pgdp, addr);
+               pudp = pud_offset(p4dp, addr);
+
+               kasan_pmd_populate(pudp, addr, next, early);
+       } while (pgdp++, addr = next, addr != end);
+}
+
+extern struct proc_info_list *lookup_processor_type(unsigned int);
+
+void __init kasan_early_init(void)
+{
+       struct proc_info_list *list;
+
+       /*
+        * locate processor in the list of supported processor
+        * types.  The linker builds this table for us from the
+        * entries in arch/arm/mm/proc-*.S
+        */
+       list = lookup_processor_type(read_cpuid_id());
+       if (list) {
+#ifdef MULTI_CPU
+               processor = *list->proc;
+#endif
+       }
+
+       BUILD_BUG_ON((KASAN_SHADOW_END - (1UL << 29)) != KASAN_SHADOW_OFFSET);
+       /*
+        * We walk the page table and set all of the shadow memory to point
+        * to the scratch page.
+        */
+       kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+}
+
+static void __init clear_pgds(unsigned long start,
+                       unsigned long end)
+{
+       for (; start && start < end; start += PMD_SIZE)
+               pmd_clear(pmd_off_k(start));
+}
+
+static int __init create_mapping(void *start, void *end)
+{
+       void *shadow_start, *shadow_end;
+
+       shadow_start = kasan_mem_to_shadow(start);
+       shadow_end = kasan_mem_to_shadow(end);
+
+       pr_info("Mapping kernel virtual memory block: %px-%px at shadow: %px-%px\n",
+               start, end, shadow_start, shadow_end);
+
+       kasan_pgd_populate((unsigned long)shadow_start & PAGE_MASK,
+                          PAGE_ALIGN((unsigned long)shadow_end), false);
+       return 0;
+}
+
+void __init kasan_init(void)
+{
+       phys_addr_t pa_start, pa_end;
+       u64 i;
+
+       /*
+        * We are going to perform proper setup of shadow memory.
+        *
+        * At first we should unmap early shadow (clear_pgds() call bellow).
+        * However, instrumented code can't execute without shadow memory.
+        *
+        * To keep the early shadow memory MMU tables around while setting up
+        * the proper shadow memory, we copy swapper_pg_dir (the initial page
+        * table) to tmp_pgd_table and use that to keep the early shadow memory
+        * mapped until the full shadow setup is finished. Then we swap back
+        * to the proper swapper_pg_dir.
+        */
+
+       memcpy(tmp_pgd_table, swapper_pg_dir, sizeof(tmp_pgd_table));
+#ifdef CONFIG_ARM_LPAE
+       /* We need to be in the same PGD or this won't work */
+       BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) !=
+                    pgd_index(KASAN_SHADOW_END));
+       memcpy(tmp_pmd_table,
+              pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)),
+              sizeof(tmp_pmd_table));
+       set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)],
+               __pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
+#endif
+       cpu_switch_mm(tmp_pgd_table, &init_mm);
+       local_flush_tlb_all();
+
+       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+       kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+                                   kasan_mem_to_shadow((void *)-1UL) + 1);
+
+       for_each_mem_range(i, &pa_start, &pa_end) {
+               void *start = __va(pa_start);
+               void *end = __va(pa_end);
+
+               /* Do not attempt to shadow highmem */
+               if (pa_start >= arm_lowmem_limit) {
+                       pr_info("Skip highmem block at %pa-%pa\n", &pa_start, &pa_end);
+                       continue;
+               }
+               if (pa_end > arm_lowmem_limit) {
+                       pr_info("Truncating shadow for memory block at %pa-%pa to lowmem region at %pa\n",
+                               &pa_start, &pa_end, &arm_lowmem_limit);
+                       end = __va(arm_lowmem_limit);
+               }
+               if (start >= end) {
+                       pr_info("Skipping invalid memory block %pa-%pa (virtual %p-%p)\n",
+                               &pa_start, &pa_end, start, end);
+                       continue;
+               }
+
+               create_mapping(start, end);
+       }
+
+       /*
+        * 1. The module global variables are in MODULES_VADDR ~ MODULES_END,
+        *    so we need to map this area.
+        * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR
+        *    ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't
+        *    use kasan_populate_zero_shadow.
+        */
+       create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE));
+
+       /*
+        * KAsan may reuse the contents of kasan_early_shadow_pte directly, so
+        * we should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte_at(&init_mm, KASAN_SHADOW_START + i*PAGE_SIZE,
+                          &kasan_early_shadow_pte[i],
+                          pfn_pte(virt_to_pfn(kasan_early_shadow_page),
+                               __pgprot(pgprot_val(PAGE_KERNEL)
+                                        | L_PTE_RDONLY)));
+
+       cpu_switch_mm(swapper_pg_dir, &init_mm);
+       local_flush_tlb_all();
+
+       memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+       pr_info("Kernel address sanitizer initialized\n");
+       init_task.kasan_depth = 0;
+}
index b8d912a..a0f8a0c 100644 (file)
@@ -165,25 +165,3 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 {
        return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT));
 }
-
-#ifdef CONFIG_STRICT_DEVMEM
-
-#include <linux/ioport.h>
-
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain
- * address is valid. The argument is a physical page number.
- * We mimic x86 here by disallowing access to system RAM as well as
- * device-exclusive MMIO regions. This effectively disable read()/write()
- * on /dev/mem.
- */
-int devmem_is_allowed(unsigned long pfn)
-{
-       if (iomem_is_exclusive(pfn << PAGE_SHIFT))
-               return 0;
-       if (!page_is_ram(pfn))
-               return 1;
-       return 0;
-}
-
-#endif
index ab69250..c06ebfb 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/procinfo.h>
 #include <asm/memory.h>
 #include <asm/pgalloc.h>
+#include <asm/kasan_def.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -39,6 +40,8 @@
 #include "mm.h"
 #include "tcm.h"
 
+extern unsigned long __atags_pointer;
+
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
@@ -946,7 +949,7 @@ static void __init create_mapping(struct map_desc *md)
                return;
        }
 
-       if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
+       if (md->type == MT_DEVICE &&
            md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
            (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
                pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
@@ -1253,8 +1256,25 @@ static inline void prepare_page_table(void)
        /*
         * Clear out all the mappings below the kernel image.
         */
+#ifdef CONFIG_KASAN
+       /*
+        * KASan's shadow memory inserts itself between the TASK_SIZE
+        * and MODULES_VADDR. Do not clear the KASan shadow memory mappings.
+        */
+       for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE)
+               pmd_clear(pmd_off_k(addr));
+       /*
+        * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes
+        * equal to MODULES_VADDR and then we exit the pmd clearing. If we
+        * are using a thumb-compiled kernel, there there will be 8MB more
+        * to clear as KASan always offset to 16 MB below MODULES_VADDR.
+        */
+       for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE)
+               pmd_clear(pmd_off_k(addr));
+#else
        for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
                pmd_clear(pmd_off_k(addr));
+#endif
 
 #ifdef CONFIG_XIP_KERNEL
        /* The XIP kernel is mapped in the module area -- skip over it */
@@ -1333,6 +1353,15 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
        for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE)
                pmd_clear(pmd_off_k(addr));
 
+       if (__atags_pointer) {
+               /* create a read-only mapping of the device tree */
+               map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK);
+               map.virtual = FDT_FIXED_BASE;
+               map.length = FDT_FIXED_SIZE;
+               map.type = MT_ROM;
+               create_mapping(&map);
+       }
+
        /*
         * Map the kernel if it is XIP.
         * It is always first in the modulearea.
@@ -1489,8 +1518,7 @@ static void __init map_lowmem(void)
 }
 
 #ifdef CONFIG_ARM_PV_FIXUP
-extern unsigned long __atags_pointer;
-typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata);
+typedef void pgtables_remap(long long offset, unsigned long pgd);
 pgtables_remap lpae_pgtables_remap_asm;
 
 /*
@@ -1503,7 +1531,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
        unsigned long pa_pgd;
        unsigned int cr, ttbcr;
        long long offset;
-       void *boot_data;
 
        if (!mdesc->pv_fixup)
                return;
@@ -1520,7 +1547,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
         */
        lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm);
        pa_pgd = __pa(swapper_pg_dir);
-       boot_data = __va(__atags_pointer);
        barrier();
 
        pr_info("Switching physical address space to 0x%08llx\n",
@@ -1556,7 +1582,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
         * needs to be assembly.  It's fairly simple, as we're using the
         * temporary tables setup by the initial assembly code.
         */
-       lpae_pgtables_remap(offset, pa_pgd, boot_data);
+       lpae_pgtables_remap(offset, pa_pgd);
 
        /* Re-enable the caches and cacheable TLB walks */
        asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr));
index c5e1b27..f8e9bc5 100644 (file)
@@ -66,7 +66,21 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
        new_pmd = pmd_alloc(mm, new_pud, 0);
        if (!new_pmd)
                goto no_pmd;
-#endif
+#ifdef CONFIG_KASAN
+       /*
+        * Copy PMD table for KASAN shadow mappings.
+        */
+       init_pgd = pgd_offset_k(TASK_SIZE);
+       init_p4d = p4d_offset(init_pgd, TASK_SIZE);
+       init_pud = pud_offset(init_p4d, TASK_SIZE);
+       init_pmd = pmd_offset(init_pud, TASK_SIZE);
+       new_pmd = pmd_offset(new_pud, TASK_SIZE);
+       memcpy(new_pmd, init_pmd,
+              (pmd_index(MODULES_VADDR) - pmd_index(TASK_SIZE))
+              * sizeof(pmd_t));
+       clean_dcache_area(new_pmd, PTRS_PER_PMD * sizeof(pmd_t));
+#endif /* CONFIG_KASAN */
+#endif /* CONFIG_LPAE */
 
        if (!vectors_high()) {
                /*
index 8eade04..5c5e195 100644 (file)
@@ -39,8 +39,8 @@ ENTRY(lpae_pgtables_remap_asm)
 
        /* Update level 2 entries for the boot data */
        add     r7, r2, #0x1000
-       add     r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER
-       bic     r7, r7, #(1 << L2_ORDER) - 1
+       movw    r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER)
+       add     r7, r7, r3
        ldrd    r4, r5, [r7]
        adds    r4, r4, r0
        adc     r5, r5, r1
index d056a54..20e1170 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2
index 150ce6e..b558bee 100644 (file)
@@ -42,6 +42,8 @@ GCOV_PROFILE := n
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT := n
 
+KASAN_SANITIZE := n
+
 # Force dependency
 $(obj)/vdso.o : $(obj)/vdso.so
 
index 0186cf9..27b0a1f 100644 (file)
@@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry)
        .align  2
 .LCvfp:
        .word   vfp_vector
-
-@ This code is called if the VFP does not exist. It needs to flag the
-@ failure to the VFP initialisation code.
-
-       __INIT
-ENTRY(vfp_testing_entry)
-       dec_preempt_count_ti r10, r4
-       ldr     r0, VFP_arch_address
-       str     r0, [r0]                @ set to non-zero value
-       ret     r9                      @ we have handled the fault
-ENDPROC(vfp_testing_entry)
-
-       .align  2
-VFP_arch_address:
-       .word   VFP_arch
-
-       __FINIT
index 4fcff9f..d5837bf 100644 (file)
@@ -79,11 +79,6 @@ ENTRY(vfp_support_entry)
        DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10
 
        .fpu    vfpv2
-       ldr     r3, [sp, #S_PSR]        @ Neither lazy restore nor FP exceptions
-       and     r3, r3, #MODE_MASK      @ are supported in kernel mode
-       teq     r3, #USR_MODE
-       bne     vfp_kmode_exception     @ Returns through lr
-
        VFPFMRX r1, FPEXC               @ Is the VFP enabled?
        DBGSTR1 "fpexc %08x", r1
        tst     r1, #FPEXC_EN
index 8c9e7f9..2cb355c 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/cputype.h>
 #include <asm/system_info.h>
 #include <asm/thread_notify.h>
+#include <asm/traps.h>
 #include <asm/vfp.h>
 
 #include "vfpinstr.h"
@@ -31,7 +32,6 @@
 /*
  * Our undef handlers (in entry.S)
  */
-asmlinkage void vfp_testing_entry(void);
 asmlinkage void vfp_support_entry(void);
 asmlinkage void vfp_null_entry(void);
 
@@ -42,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry;
  * Used in startup: set to non-zero if VFP checks fail
  * After startup, holds VFP architecture
  */
-unsigned int VFP_arch;
+static unsigned int __initdata VFP_arch;
 
 /*
  * The pointer to the vfpstate structure of the thread which currently
@@ -436,7 +436,7 @@ static void vfp_enable(void *unused)
  * present on all CPUs within a SMP complex. Needs to be called prior to
  * vfp_init().
  */
-void vfp_disable(void)
+void __init vfp_disable(void)
 {
        if (VFP_arch) {
                pr_debug("%s: should be called prior to vfp_init\n", __func__);
@@ -642,7 +642,9 @@ static int vfp_starting_cpu(unsigned int unused)
        return 0;
 }
 
-void vfp_kmode_exception(void)
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 {
        /*
         * If we reach this point, a floating point exception has been raised
@@ -660,9 +662,51 @@ void vfp_kmode_exception(void)
                pr_crit("BUG: unsupported FP instruction in kernel mode\n");
        else
                pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n");
+       pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC));
+       return 1;
 }
 
-#ifdef CONFIG_KERNEL_MODE_NEON
+static struct undef_hook vfp_kmode_exception_hook[] = {{
+       .instr_mask     = 0xfe000000,
+       .instr_val      = 0xf2000000,
+       .cpsr_mask      = MODE_MASK | PSR_T_BIT,
+       .cpsr_val       = SVC_MODE,
+       .fn             = vfp_kmode_exception,
+}, {
+       .instr_mask     = 0xff100000,
+       .instr_val      = 0xf4000000,
+       .cpsr_mask      = MODE_MASK | PSR_T_BIT,
+       .cpsr_val       = SVC_MODE,
+       .fn             = vfp_kmode_exception,
+}, {
+       .instr_mask     = 0xef000000,
+       .instr_val      = 0xef000000,
+       .cpsr_mask      = MODE_MASK | PSR_T_BIT,
+       .cpsr_val       = SVC_MODE | PSR_T_BIT,
+       .fn             = vfp_kmode_exception,
+}, {
+       .instr_mask     = 0xff100000,
+       .instr_val      = 0xf9000000,
+       .cpsr_mask      = MODE_MASK | PSR_T_BIT,
+       .cpsr_val       = SVC_MODE | PSR_T_BIT,
+       .fn             = vfp_kmode_exception,
+}, {
+       .instr_mask     = 0x0c000e00,
+       .instr_val      = 0x0c000a00,
+       .cpsr_mask      = MODE_MASK,
+       .cpsr_val       = SVC_MODE,
+       .fn             = vfp_kmode_exception,
+}};
+
+static int __init vfp_kmode_exception_hook_init(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++)
+               register_undef_hook(&vfp_kmode_exception_hook[i]);
+       return 0;
+}
+subsys_initcall(vfp_kmode_exception_hook_init);
 
 /*
  * Kernel-side NEON support functions
@@ -708,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end);
 
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
+static int __init vfp_detect(struct pt_regs *regs, unsigned int instr)
+{
+       VFP_arch = UINT_MAX;    /* mark as not present */
+       regs->ARM_pc += 4;
+       return 0;
+}
+
+static struct undef_hook vfp_detect_hook __initdata = {
+       .instr_mask     = 0x0c000e00,
+       .instr_val      = 0x0c000a00,
+       .cpsr_mask      = MODE_MASK,
+       .cpsr_val       = SVC_MODE,
+       .fn             = vfp_detect,
+};
+
 /*
  * VFP support code initialisation.
  */
@@ -728,10 +787,11 @@ static int __init vfp_init(void)
         * The handler is already setup to just log calls, so
         * we just need to read the VFPSID register.
         */
-       vfp_vector = vfp_testing_entry;
+       register_undef_hook(&vfp_detect_hook);
        barrier();
        vfpsid = fmrx(FPSID);
        barrier();
+       unregister_undef_hook(&vfp_detect_hook);
        vfp_vector = vfp_null_entry;
 
        pr_info("VFP support v0.3: ");
index 9f0139b..d0d94f7 100644 (file)
@@ -13,7 +13,6 @@ config ARM64
        select ARCH_BINFMT_ELF_STATE
        select ARCH_HAS_DEBUG_VIRTUAL
        select ARCH_HAS_DEBUG_VM_PGTABLE
-       select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
        select ARCH_HAS_FAST_MULTIPLIER
@@ -113,6 +112,7 @@ config ARM64
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
        select GENERIC_IRQ_SHOW_LEVEL
+       select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select GENERIC_PCI_IOMAP
        select GENERIC_PTDUMP
        select GENERIC_SCHED_CLOCK
@@ -137,6 +137,7 @@ config ARM64
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
        select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
+       select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_MMAP_RND_BITS
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
@@ -334,7 +335,7 @@ config BROKEN_GAS_INST
 
 config KASAN_SHADOW_OFFSET
        hex
-       depends on KASAN
+       depends on KASAN_GENERIC || KASAN_SW_TAGS
        default 0xdfff800000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS
        default 0xdfffc00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS
        default 0xdffffe0000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS
@@ -1571,6 +1572,9 @@ endmenu
 
 menu "ARMv8.5 architectural features"
 
+config AS_HAS_ARMV8_5
+       def_bool $(cc-option,-Wa$(comma)-march=armv8.5-a)
+
 config ARM64_BTI
        bool "Branch Target Identification support"
        default y
@@ -1645,6 +1649,9 @@ config ARM64_MTE
        bool "Memory Tagging Extension support"
        default y
        depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI
+       depends on AS_HAS_ARMV8_5
+       # Required for tag checking in the uaccess routines
+       depends on ARM64_PAN
        select ARCH_USES_HIGH_VMA_FLAGS
        help
          Memory Tagging (part of the ARMv8.5 Extensions) provides
index 6a87d59..6be9b37 100644 (file)
@@ -96,6 +96,11 @@ ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
 asm-arch := armv8.4-a
 endif
 
+ifeq ($(CONFIG_AS_HAS_ARMV8_5), y)
+# make sure to pass the newest target architecture to -march.
+asm-arch := armv8.5-a
+endif
+
 ifdef asm-arch
 KBUILD_CFLAGS  += -Wa,-march=$(asm-arch) \
                   -DARM64_ASM_ARCH='"$(asm-arch)"'
@@ -132,7 +137,7 @@ head-y              := arch/arm64/kernel/head.o
 
 ifeq ($(CONFIG_KASAN_SW_TAGS), y)
 KASAN_SHADOW_SCALE_SHIFT := 4
-else
+else ifeq ($(CONFIG_KASAN_GENERIC), y)
 KASAN_SHADOW_SCALE_SHIFT := 3
 endif
 
index ddbe6bf..bf125c5 100644 (file)
@@ -473,7 +473,7 @@ USER(\label, ic     ivau, \tmp2)                    // invalidate I line PoU
 #define NOKPROBE(x)
 #endif
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 #define EXPORT_SYMBOL_NOKASAN(name)
 #else
 #define EXPORT_SYMBOL_NOKASAN(name)    EXPORT_SYMBOL(name)
index 63d43b5..77cbbe3 100644 (file)
@@ -6,6 +6,7 @@
 #define __ASM_CACHE_H
 
 #include <asm/cputype.h>
+#include <asm/mte-kasan.h>
 
 #define CTR_L1IP_SHIFT         14
 #define CTR_L1IP_MASK          3
@@ -51,6 +52,8 @@
 
 #ifdef CONFIG_KASAN_SW_TAGS
 #define ARCH_SLAB_MINALIGN     (1ULL << KASAN_SHADOW_SCALE_SHIFT)
+#elif defined(CONFIG_KASAN_HW_TAGS)
+#define ARCH_SLAB_MINALIGN     MTE_GRANULE_SIZE
 #endif
 
 #ifndef __ASSEMBLY__
index a7242ef..b77d997 100644 (file)
@@ -19,7 +19,7 @@
 #define ARM64_HAS_VIRT_HOST_EXTN               11
 #define ARM64_WORKAROUND_CAVIUM_27456          12
 #define ARM64_HAS_32BIT_EL0                    13
-#define ARM64_HARDEN_EL2_VECTORS               14
+#define ARM64_SPECTRE_V3A                      14
 #define ARM64_HAS_CNP                          15
 #define ARM64_HAS_NO_FPSIMD                    16
 #define ARM64_WORKAROUND_REPEAT_TLBI           17
@@ -65,7 +65,8 @@
 #define ARM64_MTE                              57
 #define ARM64_WORKAROUND_1508412               58
 #define ARM64_HAS_LDAPR                                59
+#define ARM64_KVM_PROTECTED_MODE               60
 
-#define ARM64_NCAPS                            60
+#define ARM64_NCAPS                            61
 
 #endif /* __ASM_CPUCAPS_H */
index 1c406e8..9a55580 100644 (file)
@@ -705,6 +705,11 @@ static inline bool system_supports_generic_auth(void)
                cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH);
 }
 
+static inline bool system_has_full_ptr_auth(void)
+{
+       return system_supports_address_auth() && system_supports_generic_auth();
+}
+
 static __always_inline bool system_uses_irq_prio_masking(void)
 {
        return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) &&
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
new file mode 100644 (file)
index 0000000..a7f5a1b
--- /dev/null
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM_KVM_INIT_H__
+#define __ARM_KVM_INIT_H__
+
+#ifndef __ASSEMBLY__
+#error Assembly-only header
+#endif
+
+#include <asm/kvm_arm.h>
+#include <asm/ptrace.h>
+#include <asm/sysreg.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+.macro __init_el2_sctlr
+       mov_q   x0, INIT_SCTLR_EL2_MMU_OFF
+       msr     sctlr_el2, x0
+       isb
+.endm
+
+/*
+ * Allow Non-secure EL1 and EL0 to access physical timer and counter.
+ * This is not necessary for VHE, since the host kernel runs in EL2,
+ * and EL0 accesses are configured in the later stage of boot process.
+ * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
+ * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
+ * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
+ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
+ * EL2.
+ */
+.macro __init_el2_timers mode
+.ifeqs "\mode", "nvhe"
+       mrs     x0, cnthctl_el2
+       orr     x0, x0, #3                      // Enable EL1 physical timers
+       msr     cnthctl_el2, x0
+.endif
+       msr     cntvoff_el2, xzr                // Clear virtual offset
+.endm
+
+.macro __init_el2_debug mode
+       mrs     x1, id_aa64dfr0_el1
+       sbfx    x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
+       cmp     x0, #1
+       b.lt    1f                              // Skip if no PMU present
+       mrs     x0, pmcr_el0                    // Disable debug access traps
+       ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
+1:
+       csel    x2, xzr, x0, lt                 // all PMU counters from EL1
+
+       /* Statistical profiling */
+       ubfx    x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+       cbz     x0, 3f                          // Skip if SPE not present
+
+.ifeqs "\mode", "nvhe"
+       mrs_s   x0, SYS_PMBIDR_EL1              // If SPE available at EL2,
+       and     x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
+       cbnz    x0, 2f                          // then permit sampling of physical
+       mov     x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
+                     1 << SYS_PMSCR_EL2_PA_SHIFT)
+       msr_s   SYS_PMSCR_EL2, x0               // addresses and physical counter
+2:
+       mov     x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
+       orr     x2, x2, x0                      // If we don't have VHE, then
+                                               // use EL1&0 translation.
+.else
+       orr     x2, x2, #MDCR_EL2_TPMS          // For VHE, use EL2 translation
+                                               // and disable access from EL1
+.endif
+
+3:
+       msr     mdcr_el2, x2                    // Configure debug traps
+.endm
+
+/* LORegions */
+.macro __init_el2_lor
+       mrs     x1, id_aa64mmfr1_el1
+       ubfx    x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+       cbz     x0, 1f
+       msr_s   SYS_LORC_EL1, xzr
+1:
+.endm
+
+/* Stage-2 translation */
+.macro __init_el2_stage2
+       msr     vttbr_el2, xzr
+.endm
+
+/* GICv3 system register access */
+.macro __init_el2_gicv3
+       mrs     x0, id_aa64pfr0_el1
+       ubfx    x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
+       cbz     x0, 1f
+
+       mrs_s   x0, SYS_ICC_SRE_EL2
+       orr     x0, x0, #ICC_SRE_EL2_SRE        // Set ICC_SRE_EL2.SRE==1
+       orr     x0, x0, #ICC_SRE_EL2_ENABLE     // Set ICC_SRE_EL2.Enable==1
+       msr_s   SYS_ICC_SRE_EL2, x0
+       isb                                     // Make sure SRE is now set
+       mrs_s   x0, SYS_ICC_SRE_EL2             // Read SRE back,
+       tbz     x0, #0, 1f                      // and check that it sticks
+       msr_s   SYS_ICH_HCR_EL2, xzr            // Reset ICC_HCR_EL2 to defaults
+1:
+.endm
+
+.macro __init_el2_hstr
+       msr     hstr_el2, xzr                   // Disable CP15 traps to EL2
+.endm
+
+/* Virtual CPU ID registers */
+.macro __init_el2_nvhe_idregs
+       mrs     x0, midr_el1
+       mrs     x1, mpidr_el1
+       msr     vpidr_el2, x0
+       msr     vmpidr_el2, x1
+.endm
+
+/* Coprocessor traps */
+.macro __init_el2_nvhe_cptr
+       mov     x0, #0x33ff
+       msr     cptr_el2, x0                    // Disable copro. traps to EL2
+.endm
+
+/* SVE register access */
+.macro __init_el2_nvhe_sve
+       mrs     x1, id_aa64pfr0_el1
+       ubfx    x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
+       cbz     x1, 1f
+
+       bic     x0, x0, #CPTR_EL2_TZ            // Also disable SVE traps
+       msr     cptr_el2, x0                    // Disable copro. traps to EL2
+       isb
+       mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
+       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
+1:
+.endm
+
+.macro __init_el2_nvhe_prepare_eret
+       mov     x0, #INIT_PSTATE_EL1
+       msr     spsr_el2, x0
+.endm
+
+/**
+ * Initialize EL2 registers to sane values. This should be called early on all
+ * cores that were booted in EL2.
+ *
+ * Regs: x0, x1 and x2 are clobbered.
+ */
+.macro init_el2_state mode
+.ifnes "\mode", "vhe"
+.ifnes "\mode", "nvhe"
+.error "Invalid 'mode' argument"
+.endif
+.endif
+
+       __init_el2_sctlr
+       __init_el2_timers \mode
+       __init_el2_debug \mode
+       __init_el2_lor
+       __init_el2_stage2
+       __init_el2_gicv3
+       __init_el2_hstr
+
+       /*
+        * When VHE is not in use, early init of EL2 needs to be done here.
+        * When VHE _is_ in use, EL1 will not be used in the host and
+        * requires no configuration, and all non-hyp-specific EL2 setup
+        * will be done via the _EL1 system register aliases in __cpu_setup.
+        */
+.ifeqs "\mode", "nvhe"
+       __init_el2_nvhe_idregs
+       __init_el2_nvhe_cptr
+       __init_el2_nvhe_sve
+       __init_el2_nvhe_prepare_eret
+.endif
+.endm
+
+#endif /* __ARM_KVM_INIT_H__ */
index 85a3e49..29f97eb 100644 (file)
 #define ESR_ELx_FSC_TYPE       (0x3C)
 #define ESR_ELx_FSC_LEVEL      (0x03)
 #define ESR_ELx_FSC_EXTABT     (0x10)
+#define ESR_ELx_FSC_MTE                (0x11)
 #define ESR_ELx_FSC_SERROR     (0x11)
 #define ESR_ELx_FSC_ACCESS     (0x08)
 #define ESR_ELx_FSC_FAULT      (0x04)
index 7853739..6546158 100644 (file)
@@ -31,6 +31,10 @@ static inline u32 disr_to_esr(u64 disr)
        return esr;
 }
 
+asmlinkage void el1_sync_handler(struct pt_regs *regs);
+asmlinkage void el0_sync_handler(struct pt_regs *regs);
+asmlinkage void el0_sync_compat_handler(struct pt_regs *regs);
+
 asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void enter_from_user_mode(void);
index fd172c4..5ea8656 100644 (file)
@@ -201,6 +201,4 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
 extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
 
-extern int devmem_is_allowed(unsigned long pfn);
-
 #endif /* __ASM_IO_H */
index b0dc4ab..0aaf904 100644 (file)
@@ -12,7 +12,9 @@
 #define arch_kasan_reset_tag(addr)     __tag_reset(addr)
 #define arch_kasan_get_tag(addr)       __tag_get(addr)
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+
+void kasan_init(void);
 
 /*
  * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
@@ -33,7 +35,6 @@
 #define _KASAN_SHADOW_START(va)        (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT)))
 #define KASAN_SHADOW_START      _KASAN_SHADOW_START(vabits_actual)
 
-void kasan_init(void);
 void kasan_copy_shadow(pgd_t *pgdir);
 asmlinkage void kasan_early_init(void);
 
index 64ce293..4e90c2d 100644 (file)
@@ -80,6 +80,7 @@
                         HCR_FMO | HCR_IMO | HCR_PTW )
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
+#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
index 54387cc..8a33d83 100644 (file)
@@ -34,8 +34,6 @@
  */
 #define KVM_VECTOR_PREAMBLE    (2 * AARCH64_INSN_SIZE)
 
-#define __SMCCC_WORKAROUND_1_SMC_SZ 36
-
 #define KVM_HOST_SMCCC_ID(id)                                          \
        ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,                         \
                           ARM_SMCCC_SMC_64,                            \
@@ -150,6 +148,14 @@ extern void *__vhe_undefined_symbol;
 
 #endif
 
+struct kvm_nvhe_init_params {
+       unsigned long mair_el2;
+       unsigned long tcr_el2;
+       unsigned long tpidr_el2;
+       unsigned long stack_hyp_va;
+       phys_addr_t pgd_pa;
+};
+
 /* Translate a kernel address @ptr into its equivalent linear mapping */
 #define kvm_ksym_ref(ptr)                                              \
        ({                                                              \
@@ -165,17 +171,14 @@ struct kvm_vcpu;
 struct kvm_s2_mmu;
 
 DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
-DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector);
 DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
 #define __kvm_hyp_init         CHOOSE_NVHE_SYM(__kvm_hyp_init)
-#define __kvm_hyp_host_vector  CHOOSE_NVHE_SYM(__kvm_hyp_host_vector)
 #define __kvm_hyp_vector       CHOOSE_HYP_SYM(__kvm_hyp_vector)
 
 extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
 DECLARE_KVM_NVHE_SYM(__per_cpu_start);
 DECLARE_KVM_NVHE_SYM(__per_cpu_end);
 
-extern atomic_t arm64_el2_vector_last_slot;
 DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
 #define __bp_harden_hyp_vecs   CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
 
@@ -189,8 +192,6 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
-extern void __kvm_enable_ssbs(void);
-
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_read_vmcr(void);
 extern void __vgic_v3_write_vmcr(u32 vmcr);
@@ -198,7 +199,11 @@ extern void __vgic_v3_init_lrs(void);
 
 extern u32 __kvm_get_mdcr_el2(void);
 
-extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
+#if defined(GCC_VERSION) && GCC_VERSION < 50000
+#define SYM_CONSTRAINT "i"
+#else
+#define SYM_CONSTRAINT "S"
+#endif
 
 /*
  * Obtain the PC-relative address of a kernel symbol
@@ -216,7 +221,7 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ];
                typeof(s) *addr;                                        \
                asm("adrp       %0, %1\n"                               \
                    "add        %0, %0, :lo12:%1\n"                     \
-                   : "=r" (addr) : "S" (&s));                          \
+                   : "=r" (addr) : SYM_CONSTRAINT (&s));               \
                addr;                                                   \
        })
 
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
deleted file mode 100644 (file)
index d6bb401..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Derived from arch/arm/include/asm/kvm_coproc.h
- * Copyright (C) 2012 Rusty Russell IBM Corporation
- */
-
-#ifndef __ARM64_KVM_COPROC_H__
-#define __ARM64_KVM_COPROC_H__
-
-#include <linux/kvm_host.h>
-
-void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-
-struct kvm_sys_reg_table {
-       const struct sys_reg_desc *table;
-       size_t num;
-};
-
-int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
-int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
-int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
-int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
-
-#define kvm_coproc_table_init kvm_sys_reg_table_init
-void kvm_sys_reg_table_init(void);
-
-struct kvm_one_reg;
-int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
-int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
-unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
-
-#endif /* __ARM64_KVM_COPROC_H__ */
index 00bc6f1..f612c09 100644 (file)
 #include <asm/cputype.h>
 #include <asm/virt.h>
 
-unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
-void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
+#define CURRENT_EL_SP_EL0_VECTOR       0x0
+#define CURRENT_EL_SP_ELx_VECTOR       0x200
+#define LOWER_EL_AArch64_VECTOR                0x400
+#define LOWER_EL_AArch32_VECTOR                0x600
+
+enum exception_type {
+       except_type_sync        = 0,
+       except_type_irq         = 0x80,
+       except_type_fiq         = 0x100,
+       except_type_serror      = 0x180,
+};
 
 bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu);
 
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_vabt(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_undef32(struct kvm_vcpu *vcpu);
-void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
 
 static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
 {
@@ -168,30 +173,6 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
                vcpu_gp_regs(vcpu)->regs[reg_num] = val;
 }
 
-static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
-{
-       if (vcpu_mode_is_32bit(vcpu))
-               return vcpu_read_spsr32(vcpu);
-
-       if (vcpu->arch.sysregs_loaded_on_cpu)
-               return read_sysreg_el1(SYS_SPSR);
-       else
-               return __vcpu_sys_reg(vcpu, SPSR_EL1);
-}
-
-static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
-{
-       if (vcpu_mode_is_32bit(vcpu)) {
-               vcpu_write_spsr32(vcpu, v);
-               return;
-       }
-
-       if (vcpu->arch.sysregs_loaded_on_cpu)
-               write_sysreg_el1(v, SYS_SPSR);
-       else
-               __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
-}
-
 /*
  * The layout of SPSR for an AArch32 state is different when observed from an
  * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
@@ -477,32 +458,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
        return data;            /* Leave LE untouched */
 }
 
-static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
-{
-       if (vcpu_mode_is_32bit(vcpu)) {
-               kvm_skip_instr32(vcpu, is_wide_instr);
-       } else {
-               *vcpu_pc(vcpu) += 4;
-               *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK;
-       }
-
-       /* advance the singlestep state machine */
-       *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-}
-
-/*
- * Skip an instruction which has been emulated at hyp while most guest sysregs
- * are live.
- */
-static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
+static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
 {
-       *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
-       vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR);
-
-       kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-
-       write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR);
-       write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+       vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
 }
 
 #endif /* __ARM64_KVM_EMULATE_H__ */
index 0cd9f0f..11beda8 100644 (file)
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
                                     KVM_DIRTY_LOG_INITIALLY_SET)
 
+/*
+ * Mode of operation configurable with kvm-arm.mode early param.
+ * See Documentation/admin-guide/kernel-parameters.txt for more information.
+ */
+enum kvm_mode {
+       KVM_MODE_DEFAULT,
+       KVM_MODE_PROTECTED,
+};
+enum kvm_mode kvm_get_mode(void);
+
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 extern unsigned int kvm_sve_max_vl;
@@ -58,8 +68,6 @@ int kvm_arm_init_sve(void);
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
-int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
-void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
 struct kvm_vmid {
        /* The VMID generation used for the virt. memory system */
@@ -89,6 +97,9 @@ struct kvm_s2_mmu {
        struct kvm *kvm;
 };
 
+struct kvm_arch_memory_slot {
+};
+
 struct kvm_arch {
        struct kvm_s2_mmu mmu;
 
@@ -120,6 +131,7 @@ struct kvm_arch {
        unsigned int pmuver;
 
        u8 pfr0_csv2;
+       u8 pfr0_csv3;
 };
 
 struct kvm_vcpu_fault_info {
@@ -203,48 +215,6 @@ enum vcpu_sysreg {
        NR_SYS_REGS     /* Nothing after this line! */
 };
 
-/* 32bit mapping */
-#define c0_MPIDR       (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
-#define c0_CSSELR      (CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR       (SCTLR_EL1 * 2) /* System Control Register */
-#define c1_ACTLR       (ACTLR_EL1 * 2) /* Auxiliary Control Register */
-#define c1_CPACR       (CPACR_EL1 * 2) /* Coprocessor Access Control */
-#define c2_TTBR0       (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
-#define c2_TTBR0_high  (c2_TTBR0 + 1)  /* TTBR0 top 32 bits */
-#define c2_TTBR1       (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
-#define c2_TTBR1_high  (c2_TTBR1 + 1)  /* TTBR1 top 32 bits */
-#define c2_TTBCR       (TCR_EL1 * 2)   /* Translation Table Base Control R. */
-#define c3_DACR                (DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR                (ESR_EL1 * 2)   /* Data Fault Status Register */
-#define c5_IFSR                (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR       (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
-#define c5_AIFSR       (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
-#define c6_DFAR                (FAR_EL1 * 2)   /* Data Fault Address Register */
-#define c6_IFAR                (c6_DFAR + 1)   /* Instruction Fault Address Register */
-#define c7_PAR         (PAR_EL1 * 2)   /* Physical Address Register */
-#define c7_PAR_high    (c7_PAR + 1)    /* PAR top 32 bits */
-#define c10_PRRR       (MAIR_EL1 * 2)  /* Primary Region Remap Register */
-#define c10_NMRR       (c10_PRRR + 1)  /* Normal Memory Remap Register */
-#define c12_VBAR       (VBAR_EL1 * 2)  /* Vector Base Address Register */
-#define c13_CID                (CONTEXTIDR_EL1 * 2)    /* Context ID Register */
-#define c13_TID_URW    (TPIDR_EL0 * 2) /* Thread ID, User R/W */
-#define c13_TID_URO    (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV   (TPIDR_EL1 * 2) /* Thread ID, Privileged */
-#define c10_AMAIR0     (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
-#define c10_AMAIR1     (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL    (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-
-#define cp14_DBGDSCRext        (MDSCR_EL1 * 2)
-#define cp14_DBGBCR0   (DBGBCR0_EL1 * 2)
-#define cp14_DBGBVR0   (DBGBVR0_EL1 * 2)
-#define cp14_DBGBXVR0  (cp14_DBGBVR0 + 1)
-#define cp14_DBGWCR0   (DBGWCR0_EL1 * 2)
-#define cp14_DBGWVR0   (DBGWVR0_EL1 * 2)
-#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
-#define cp14_DBGVCR    (DBGVCR32_EL2 * 2)
-
-#define NR_COPRO_REGS  (NR_SYS_REGS * 2)
-
 struct kvm_cpu_context {
        struct user_pt_regs regs;       /* sp = sp_el0 */
 
@@ -255,10 +225,7 @@ struct kvm_cpu_context {
 
        struct user_fpsimd_state fp_regs;
 
-       union {
-               u64 sys_regs[NR_SYS_REGS];
-               u32 copro[NR_COPRO_REGS];
-       };
+       u64 sys_regs[NR_SYS_REGS];
 
        struct kvm_vcpu *__hyp_running_vcpu;
 };
@@ -409,8 +376,33 @@ struct kvm_vcpu_arch {
 #define KVM_ARM64_GUEST_HAS_SVE                (1 << 5) /* SVE exposed to guest */
 #define KVM_ARM64_VCPU_SVE_FINALIZED   (1 << 6) /* SVE config completed */
 #define KVM_ARM64_GUEST_HAS_PTRAUTH    (1 << 7) /* PTRAUTH exposed to guest */
+#define KVM_ARM64_PENDING_EXCEPTION    (1 << 8) /* Exception pending */
+#define KVM_ARM64_EXCEPT_MASK          (7 << 9) /* Target EL/MODE */
 
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
+/*
+ * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
+ * take the following values:
+ *
+ * For AArch32 EL1:
+ */
+#define KVM_ARM64_EXCEPT_AA32_UND      (0 << 9)
+#define KVM_ARM64_EXCEPT_AA32_IABT     (1 << 9)
+#define KVM_ARM64_EXCEPT_AA32_DABT     (2 << 9)
+/* For AArch64: */
+#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9)
+#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ  (1 << 9)
+#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ  (2 << 9)
+#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9)
+#define KVM_ARM64_EXCEPT_AA64_EL1      (0 << 11)
+#define KVM_ARM64_EXCEPT_AA64_EL2      (1 << 11)
+
+/*
+ * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be
+ * set together with an exception...
+ */
+#define KVM_ARM64_INCREMENT_PC         (1 << 9) /* Increment PC */
+
+#define vcpu_has_sve(vcpu) (system_supports_sve() &&                   \
                            ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE))
 
 #ifdef CONFIG_ARM64_PTR_AUTH
@@ -440,14 +432,96 @@ struct kvm_vcpu_arch {
 u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
 void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
 
-/*
- * CP14 and CP15 live in the same array, as they are backed by the
- * same system registers.
- */
-#define CPx_BIAS               IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
+{
+       /*
+        * *** VHE ONLY ***
+        *
+        * System registers listed in the switch are not saved on every
+        * exit from the guest but are only saved on vcpu_put.
+        *
+        * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+        * should never be listed below, because the guest cannot modify its
+        * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+        * thread when emulating cross-VCPU communication.
+        */
+       if (!has_vhe())
+               return false;
+
+       switch (reg) {
+       case CSSELR_EL1:        *val = read_sysreg_s(SYS_CSSELR_EL1);   break;
+       case SCTLR_EL1:         *val = read_sysreg_s(SYS_SCTLR_EL12);   break;
+       case CPACR_EL1:         *val = read_sysreg_s(SYS_CPACR_EL12);   break;
+       case TTBR0_EL1:         *val = read_sysreg_s(SYS_TTBR0_EL12);   break;
+       case TTBR1_EL1:         *val = read_sysreg_s(SYS_TTBR1_EL12);   break;
+       case TCR_EL1:           *val = read_sysreg_s(SYS_TCR_EL12);     break;
+       case ESR_EL1:           *val = read_sysreg_s(SYS_ESR_EL12);     break;
+       case AFSR0_EL1:         *val = read_sysreg_s(SYS_AFSR0_EL12);   break;
+       case AFSR1_EL1:         *val = read_sysreg_s(SYS_AFSR1_EL12);   break;
+       case FAR_EL1:           *val = read_sysreg_s(SYS_FAR_EL12);     break;
+       case MAIR_EL1:          *val = read_sysreg_s(SYS_MAIR_EL12);    break;
+       case VBAR_EL1:          *val = read_sysreg_s(SYS_VBAR_EL12);    break;
+       case CONTEXTIDR_EL1:    *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
+       case TPIDR_EL0:         *val = read_sysreg_s(SYS_TPIDR_EL0);    break;
+       case TPIDRRO_EL0:       *val = read_sysreg_s(SYS_TPIDRRO_EL0);  break;
+       case TPIDR_EL1:         *val = read_sysreg_s(SYS_TPIDR_EL1);    break;
+       case AMAIR_EL1:         *val = read_sysreg_s(SYS_AMAIR_EL12);   break;
+       case CNTKCTL_EL1:       *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
+       case ELR_EL1:           *val = read_sysreg_s(SYS_ELR_EL12);     break;
+       case PAR_EL1:           *val = read_sysreg_par();               break;
+       case DACR32_EL2:        *val = read_sysreg_s(SYS_DACR32_EL2);   break;
+       case IFSR32_EL2:        *val = read_sysreg_s(SYS_IFSR32_EL2);   break;
+       case DBGVCR32_EL2:      *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
+       default:                return false;
+       }
+
+       return true;
+}
 
-#define vcpu_cp14(v,r)         ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS])
-#define vcpu_cp15(v,r)         ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS])
+static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
+{
+       /*
+        * *** VHE ONLY ***
+        *
+        * System registers listed in the switch are not restored on every
+        * entry to the guest but are only restored on vcpu_load.
+        *
+        * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+        * should never be listed below, because the MPIDR should only be set
+        * once, before running the VCPU, and never changed later.
+        */
+       if (!has_vhe())
+               return false;
+
+       switch (reg) {
+       case CSSELR_EL1:        write_sysreg_s(val, SYS_CSSELR_EL1);    break;
+       case SCTLR_EL1:         write_sysreg_s(val, SYS_SCTLR_EL12);    break;
+       case CPACR_EL1:         write_sysreg_s(val, SYS_CPACR_EL12);    break;
+       case TTBR0_EL1:         write_sysreg_s(val, SYS_TTBR0_EL12);    break;
+       case TTBR1_EL1:         write_sysreg_s(val, SYS_TTBR1_EL12);    break;
+       case TCR_EL1:           write_sysreg_s(val, SYS_TCR_EL12);      break;
+       case ESR_EL1:           write_sysreg_s(val, SYS_ESR_EL12);      break;
+       case AFSR0_EL1:         write_sysreg_s(val, SYS_AFSR0_EL12);    break;
+       case AFSR1_EL1:         write_sysreg_s(val, SYS_AFSR1_EL12);    break;
+       case FAR_EL1:           write_sysreg_s(val, SYS_FAR_EL12);      break;
+       case MAIR_EL1:          write_sysreg_s(val, SYS_MAIR_EL12);     break;
+       case VBAR_EL1:          write_sysreg_s(val, SYS_VBAR_EL12);     break;
+       case CONTEXTIDR_EL1:    write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
+       case TPIDR_EL0:         write_sysreg_s(val, SYS_TPIDR_EL0);     break;
+       case TPIDRRO_EL0:       write_sysreg_s(val, SYS_TPIDRRO_EL0);   break;
+       case TPIDR_EL1:         write_sysreg_s(val, SYS_TPIDR_EL1);     break;
+       case AMAIR_EL1:         write_sysreg_s(val, SYS_AMAIR_EL12);    break;
+       case CNTKCTL_EL1:       write_sysreg_s(val, SYS_CNTKCTL_EL12);  break;
+       case ELR_EL1:           write_sysreg_s(val, SYS_ELR_EL12);      break;
+       case PAR_EL1:           write_sysreg_s(val, SYS_PAR_EL1);       break;
+       case DACR32_EL2:        write_sysreg_s(val, SYS_DACR32_EL2);    break;
+       case IFSR32_EL2:        write_sysreg_s(val, SYS_IFSR32_EL2);    break;
+       case DBGVCR32_EL2:      write_sysreg_s(val, SYS_DBGVCR32_EL2);  break;
+       default:                return false;
+       }
+
+       return true;
+}
 
 struct kvm_vm_stat {
        ulong remote_tlb_flush;
@@ -473,6 +547,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+
 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
                              struct kvm_vcpu_events *events);
 
@@ -535,6 +615,17 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+void kvm_sys_reg_table_init(void);
+
 /* MMIO helpers */
 void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
 unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -654,4 +745,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 #define kvm_arm_vcpu_sve_finalized(vcpu) \
        ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
 
+#define kvm_vcpu_has_pmu(vcpu)                                 \
+       (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
+
 #endif /* __ARM64_KVM_HOST_H__ */
index 6b664de..c045082 100644 (file)
@@ -14,6 +14,7 @@
 
 DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
 DECLARE_PER_CPU(unsigned long, kvm_hyp_vector);
+DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 #define read_sysreg_elx(r,nvh,vh)                                      \
        ({                                                              \
@@ -92,10 +93,11 @@ void deactivate_traps_vhe_put(void);
 
 u64 __guest_enter(struct kvm_vcpu *vcpu);
 
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
+
 void __noreturn hyp_panic(void);
 #ifdef __KVM_NVHE_HYPERVISOR__
 void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
 #endif
 
 #endif /* __ARM64_KVM_HYP_H__ */
-
index 3313943..e52d82a 100644 (file)
@@ -72,6 +72,52 @@ alternative_cb kvm_update_va_mask
 alternative_cb_end
 .endm
 
+/*
+ * Convert a kernel image address to a PA
+ * reg: kernel address to be converted in place
+ * tmp: temporary register
+ *
+ * The actual code generation takes place in kvm_get_kimage_voffset, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_get_kimage_voffset uses the
+ * specific registers encoded in the instructions).
+ */
+.macro kimg_pa reg, tmp
+alternative_cb kvm_get_kimage_voffset
+       movz    \tmp, #0
+       movk    \tmp, #0, lsl #16
+       movk    \tmp, #0, lsl #32
+       movk    \tmp, #0, lsl #48
+alternative_cb_end
+
+       /* reg = __pa(reg) */
+       sub     \reg, \reg, \tmp
+.endm
+
+/*
+ * Convert a kernel image address to a hyp VA
+ * reg: kernel address to be converted in place
+ * tmp: temporary register
+ *
+ * The actual code generation takes place in kvm_get_kimage_voffset, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_kimg_phys_offset uses the
+ * specific registers encoded in the instructions).
+ */
+.macro kimg_hyp_va reg, tmp
+alternative_cb kvm_update_kimg_phys_offset
+       movz    \tmp, #0
+       movk    \tmp, #0, lsl #16
+       movk    \tmp, #0, lsl #32
+       movk    \tmp, #0, lsl #48
+alternative_cb_end
+
+       sub     \reg, \reg, \tmp
+       mov_q   \tmp, PAGE_OFFSET
+       orr     \reg, \reg, \tmp
+       kern_hyp_va \reg
+.endm
+
 #else
 
 #include <linux/pgtable.h>
@@ -98,6 +144,24 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 
 #define kern_hyp_va(v)         ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
+static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
+{
+       unsigned long offset;
+
+       asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
+                                   "movk %0, #0, lsl #16\n"
+                                   "movk %0, #0, lsl #32\n"
+                                   "movk %0, #0, lsl #48\n",
+                                   kvm_update_kimg_phys_offset)
+                    : "=r" (offset));
+
+       return __kern_hyp_va((v - offset) | PAGE_OFFSET);
+}
+
+#define kimg_fn_hyp_va(v)      ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
+
+#define kimg_fn_ptr(x) (typeof(x) **)(x)
+
 /*
  * We currently support using a VM-specified IPA size. For backward
  * compatibility, the default IPA size is fixed to 40bits.
@@ -208,52 +272,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
        return ret;
 }
 
-/*
- * EL2 vectors can be mapped and rerouted in a number of ways,
- * depending on the kernel configuration and CPU present:
- *
- * - If the CPU is affected by Spectre-v2, the hardening sequence is
- *   placed in one of the vector slots, which is executed before jumping
- *   to the real vectors.
- *
- * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot
- *   containing the hardening sequence is mapped next to the idmap page,
- *   and executed before jumping to the real vectors.
- *
- * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
- *   empty slot is selected, mapped next to the idmap page, and
- *   executed before jumping to the real vectors.
- *
- * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
- * VHE, as we don't have hypervisor-specific mappings. If the system
- * is VHE and yet selects this capability, it will be ignored.
- */
-extern void *__kvm_bp_vect_base;
-extern int __kvm_harden_el2_vector_slot;
-
-static inline void *kvm_get_hyp_vector(void)
-{
-       struct bp_hardening_data *data = arm64_get_bp_hardening_data();
-       void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
-       int slot = -1;
-
-       if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) {
-               vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
-               slot = data->hyp_vectors_slot;
-       }
-
-       if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
-               vect = __kvm_bp_vect_base;
-               if (slot == -1)
-                       slot = __kvm_harden_el2_vector_slot;
-       }
-
-       if (slot != -1)
-               vect += slot * SZ_2K;
-
-       return vect;
-}
-
 #define kvm_phys_to_vttbr(addr)                phys_to_ttbr(addr)
 
 static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
index 556cb2d..18fce22 100644 (file)
@@ -72,7 +72,7 @@
  * address space for the shadow region respectively. They can bloat the stack
  * significantly, so double the (minimum) stack size when they are in use.
  */
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 #define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
 #define KASAN_SHADOW_END       ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \
                                        + KASAN_SHADOW_OFFSET)
@@ -214,7 +214,7 @@ static inline unsigned long kaslr_offset(void)
        (__force __typeof__(addr))__addr;                               \
 })
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 #define __tag_shifted(tag)     ((u64)(tag) << 56)
 #define __tag_reset(addr)      __untagged_addr(addr)
 #define __tag_get(addr)                (__u8)((u64)(addr) >> 56)
@@ -222,7 +222,7 @@ static inline unsigned long kaslr_offset(void)
 #define __tag_shifted(tag)     0UL
 #define __tag_reset(addr)      (addr)
 #define __tag_get(addr)                0
-#endif /* CONFIG_KASAN_SW_TAGS */
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
 
 static inline const void *__tag_set(const void *addr, u8 tag)
 {
@@ -230,6 +230,15 @@ static inline const void *__tag_set(const void *addr, u8 tag)
        return (const void *)(__addr | __tag_shifted(tag));
 }
 
+#ifdef CONFIG_KASAN_HW_TAGS
+#define arch_enable_tagging()                  mte_enable_kernel()
+#define arch_init_tags(max_tag)                        mte_init_tags(max_tag)
+#define arch_get_random_tag()                  mte_get_random_tag()
+#define arch_get_mem_tag(addr)                 mte_get_mem_tag(addr)
+#define arch_set_mem_tag_range(addr, size, tag)        \
+                       mte_set_mem_tag_range((addr), (size), (tag))
+#endif /* CONFIG_KASAN_HW_TAGS */
+
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
index b2e91c1..75beffe 100644 (file)
@@ -12,9 +12,6 @@
 #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
 #define TTBR_ASID_MASK (UL(0xffff) << 48)
 
-#define BP_HARDEN_EL2_SLOTS 4
-#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K)
-
 #ifndef __ASSEMBLY__
 
 #include <linux/refcount.h>
@@ -41,32 +38,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
        return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
 }
 
-typedef void (*bp_hardening_cb_t)(void);
-
-struct bp_hardening_data {
-       int                     hyp_vectors_slot;
-       bp_hardening_cb_t       fn;
-};
-
-DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
-static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
-{
-       return this_cpu_ptr(&bp_hardening_data);
-}
-
-static inline void arm64_apply_bp_hardening(void)
-{
-       struct bp_hardening_data *d;
-
-       if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
-               return;
-
-       d = arm64_get_bp_hardening_data();
-       if (d->fn)
-               d->fn();
-}
-
 extern void arm64_memblock_init(void);
 extern void paging_init(void);
 extern void bootmem_init(void);
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
new file mode 100644 (file)
index 0000000..2d73a16
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_DEF_H
+#define __ASM_MTE_DEF_H
+
+#define MTE_GRANULE_SIZE       UL(16)
+#define MTE_GRANULE_MASK       (~(MTE_GRANULE_SIZE - 1))
+#define MTE_TAG_SHIFT          56
+#define MTE_TAG_SIZE           4
+#define MTE_TAG_MASK           GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+
+#endif /* __ASM_MTE_DEF_H  */
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
new file mode 100644 (file)
index 0000000..26349a4
--- /dev/null
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_KASAN_H
+#define __ASM_MTE_KASAN_H
+
+#include <asm/mte-def.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/*
+ * The functions below are meant to be used only for the
+ * KASAN_HW_TAGS interface defined in asm/memory.h.
+ */
+#ifdef CONFIG_ARM64_MTE
+
+static inline u8 mte_get_ptr_tag(void *ptr)
+{
+       /* Note: The format of KASAN tags is 0xF<x> */
+       u8 tag = 0xF0 | (u8)(((u64)(ptr)) >> MTE_TAG_SHIFT);
+
+       return tag;
+}
+
+u8 mte_get_mem_tag(void *addr);
+u8 mte_get_random_tag(void);
+void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag);
+
+void mte_enable_kernel(void);
+void mte_init_tags(u64 max_tag);
+
+#else /* CONFIG_ARM64_MTE */
+
+static inline u8 mte_get_ptr_tag(void *ptr)
+{
+       return 0xFF;
+}
+
+static inline u8 mte_get_mem_tag(void *addr)
+{
+       return 0xFF;
+}
+static inline u8 mte_get_random_tag(void)
+{
+       return 0xFF;
+}
+static inline void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
+{
+       return addr;
+}
+
+static inline void mte_enable_kernel(void)
+{
+}
+
+static inline void mte_init_tags(u64 max_tag)
+{
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_MTE_KASAN_H  */
index 1c99fca..d02aff9 100644 (file)
@@ -5,17 +5,21 @@
 #ifndef __ASM_MTE_H
 #define __ASM_MTE_H
 
-#define MTE_GRANULE_SIZE       UL(16)
-#define MTE_GRANULE_MASK       (~(MTE_GRANULE_SIZE - 1))
-#define MTE_TAG_SHIFT          56
-#define MTE_TAG_SIZE           4
+#include <asm/compiler.h>
+#include <asm/mte-def.h>
+
+#define __MTE_PREAMBLE         ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
 
 #ifndef __ASSEMBLY__
 
+#include <linux/bitfield.h>
 #include <linux/page-flags.h>
+#include <linux/types.h>
 
 #include <asm/pgtable-types.h>
 
+extern u64 gcr_kernel_excl;
+
 void mte_clear_page_tags(void *addr);
 unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
                                      unsigned long n);
@@ -45,7 +49,9 @@ long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
                         unsigned long addr, unsigned long data);
 
-#else
+void mte_assign_mem_tag_range(void *addr, size_t size);
+
+#else /* CONFIG_ARM64_MTE */
 
 /* unused if !CONFIG_ARM64_MTE, silence the compiler */
 #define PG_mte_tagged  0
@@ -80,7 +86,11 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
        return -EIO;
 }
 
-#endif
+static inline void mte_assign_mem_tag_range(void *addr, size_t size)
+{
+}
+
+#endif /* CONFIG_ARM64_MTE */
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_MTE_H  */
index 1599e17..8f16616 100644 (file)
@@ -239,6 +239,12 @@ PERCPU_RET_OP(add, add, ldadd)
 #define this_cpu_cmpxchg_8(pcp, o, n)  \
        _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+extern unsigned long __hyp_per_cpu_offset(unsigned int cpu);
+#define __per_cpu_offset
+#define per_cpu_offset(cpu)    __hyp_per_cpu_offset((cpu))
+#endif
+
 #include <asm-generic/percpu.h>
 
 /* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */
index 724249f..ca2cd75 100644 (file)
@@ -152,7 +152,7 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_ARM64_MTE
        u64                     sctlr_tcf0;
-       u64                     gcr_user_incl;
+       u64                     gcr_user_excl;
 #endif
 };
 
index 3994169..8ff5793 100644 (file)
@@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
 extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
 extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 extern char __hyp_text_start[], __hyp_text_end[];
+extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
 extern char __idmap_text_start[], __idmap_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __inittext_begin[], __inittext_end[];
index 2e7f529..bcb01ca 100644 (file)
@@ -46,9 +46,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
  * Logical CPU mapping.
  */
 extern u64 __cpu_logical_map[NR_CPUS];
-extern u64 cpu_logical_map(int cpu);
+extern u64 cpu_logical_map(unsigned int cpu);
 
-static inline void set_cpu_logical_map(int cpu, u64 hwid)
+static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
 {
        __cpu_logical_map[cpu] = hwid;
 }
index fcdfbce..f62ca39 100644 (file)
@@ -9,7 +9,15 @@
 #ifndef __ASM_SPECTRE_H
 #define __ASM_SPECTRE_H
 
+#define BP_HARDEN_EL2_SLOTS 4
+#define __BP_HARDEN_HYP_VECS_SZ        ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/percpu.h>
+
 #include <asm/cpufeature.h>
+#include <asm/virt.h>
 
 /* Watch out, ordering is important here. */
 enum mitigation_state {
@@ -20,13 +28,70 @@ enum mitigation_state {
 
 struct task_struct;
 
+/*
+ * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and
+ * we rely on having the direct vectors first.
+ */
+enum arm64_hyp_spectre_vector {
+       /*
+        * Take exceptions directly to __kvm_hyp_vector. This must be
+        * 0 so that it used by default when mitigations are not needed.
+        */
+       HYP_VECTOR_DIRECT,
+
+       /*
+        * Bounce via a slot in the hypervisor text mapping of
+        * __bp_harden_hyp_vecs, which contains an SMC call.
+        */
+       HYP_VECTOR_SPECTRE_DIRECT,
+
+       /*
+        * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+        * next to the idmap page.
+        */
+       HYP_VECTOR_INDIRECT,
+
+       /*
+        * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs
+        * next to the idmap page, which contains an SMC call.
+        */
+       HYP_VECTOR_SPECTRE_INDIRECT,
+};
+
+typedef void (*bp_hardening_cb_t)(void);
+
+struct bp_hardening_data {
+       enum arm64_hyp_spectre_vector   slot;
+       bp_hardening_cb_t               fn;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+static inline void arm64_apply_bp_hardening(void)
+{
+       struct bp_hardening_data *d;
+
+       if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
+               return;
+
+       d = this_cpu_ptr(&bp_hardening_data);
+       if (d->fn)
+               d->fn();
+}
+
 enum mitigation_state arm64_get_spectre_v2_state(void);
 bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope);
 void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
 
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
 enum mitigation_state arm64_get_spectre_v4_state(void);
 bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope);
 void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
 void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
 
+enum mitigation_state arm64_get_meltdown_state(void);
+
+#endif /* __ASSEMBLY__ */
 #endif /* __ASM_SPECTRE_H */
index b31e8e8..3a3264f 100644 (file)
@@ -5,7 +5,7 @@
 #ifndef __ASM_STRING_H
 #define __ASM_STRING_H
 
-#ifndef CONFIG_KASAN
+#if !(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
 #define __HAVE_ARCH_STRRCHR
 extern char *strrchr(const char *, int c);
 
@@ -48,7 +48,8 @@ extern void *__memset(void *, int, __kernel_size_t);
 void memcpy_flushcache(void *dst, const void *src, size_t cnt);
 #endif
 
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+       !defined(__SANITIZE_ADDRESS__)
 
 /*
  * For files that are not instrumented (e.g. mm/slub.c) we
index cf7922f..8b5e7e5 100644 (file)
 
 #define SYS_PMCCFILTR_EL0              sys_reg(3, 3, 14, 15, 7)
 
+#define SYS_SCTLR_EL2                  sys_reg(3, 4, 1, 0, 0)
 #define SYS_ZCR_EL2                    sys_reg(3, 4, 1, 2, 0)
 #define SYS_DACR32_EL2                 sys_reg(3, 4, 3, 0, 0)
 #define SYS_SPSR_EL2                   sys_reg(3, 4, 4, 0, 0)
index abb31aa..6f986e0 100644 (file)
@@ -159,8 +159,28 @@ static inline void __uaccess_enable_hw_pan(void)
                        CONFIG_ARM64_PAN));
 }
 
+/*
+ * The Tag Check Flag (TCF) mode for MTE is per EL, hence TCF0
+ * affects EL0 and TCF affects EL1 irrespective of which TTBR is
+ * used.
+ * The kernel accesses TTBR0 usually with LDTR/STTR instructions
+ * when UAO is available, so these would act as EL0 accesses using
+ * TCF0.
+ * However futex.h code uses exclusives which would be executed as
+ * EL1, this can potentially cause a tag check fault even if the
+ * user disables TCF0.
+ *
+ * To address the problem we set the PSTATE.TCO bit in uaccess_enable()
+ * and reset it in uaccess_disable().
+ *
+ * The Tag check override (TCO) bit disables temporarily the tag checking
+ * preventing the issue.
+ */
 static inline void uaccess_disable_privileged(void)
 {
+       asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0),
+                                ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+
        if (uaccess_ttbr0_disable())
                return;
 
@@ -169,6 +189,9 @@ static inline void uaccess_disable_privileged(void)
 
 static inline void uaccess_enable_privileged(void)
 {
+       asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1),
+                                ARM64_MTE, CONFIG_KASAN_HW_TAGS));
+
        if (uaccess_ttbr0_enable())
                return;
 
index b3b2019..86a9d7b 100644 (file)
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls           441
+#define __NR_compat_syscalls           442
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
index 107f08e..cccfbbe 100644 (file)
@@ -889,6 +889,8 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
 #define __NR_process_madvise 440
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_epoll_pwait2 441
+__SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
 
 /*
  * Please add new compat syscalls above this comment and update
index 6069be5..ee6a48d 100644 (file)
@@ -65,9 +65,19 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 void __hyp_reset_vectors(void);
 
+DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
+       /*
+        * If KVM protected mode is initialized, all CPUs must have been booted
+        * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+        */
+       if (IS_ENABLED(CONFIG_KVM) &&
+           static_branch_likely(&kvm_protected_mode_initialized))
+               return true;
+
        return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
                __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
 }
@@ -75,6 +85,14 @@ static inline bool is_hyp_mode_available(void)
 /* Check if the bootloader has booted CPUs in different modes */
 static inline bool is_hyp_mode_mismatched(void)
 {
+       /*
+        * If KVM protected mode is initialized, all CPUs must have been booted
+        * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1.
+        */
+       if (IS_ENABLED(CONFIG_KVM) &&
+           static_branch_likely(&kvm_protected_mode_initialized))
+               return false;
+
        return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
@@ -97,6 +115,14 @@ static __always_inline bool has_vhe(void)
                return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN);
 }
 
+static __always_inline bool is_protected_kvm_enabled(void)
+{
+       if (is_vhe_hyp_code())
+               return false;
+       else
+               return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
index 1c17c3a..24223ad 100644 (file)
@@ -156,9 +156,6 @@ struct kvm_sync_regs {
        __u64 device_irq_level;
 };
 
-struct kvm_arch_memory_slot {
-};
-
 /*
  * PMU filter structure. Describe a range of events with a particular
  * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
index 679b19b..f42fd9e 100644 (file)
@@ -47,6 +47,9 @@ int main(void)
   DEFINE(THREAD_KEYS_USER,     offsetof(struct task_struct, thread.keys_user));
   DEFINE(THREAD_KEYS_KERNEL,   offsetof(struct task_struct, thread.keys_kernel));
 #endif
+#ifdef CONFIG_ARM64_MTE
+  DEFINE(THREAD_GCR_EL1_USER,  offsetof(struct task_struct, thread.gcr_user_excl));
+#endif
   BLANK();
   DEFINE(S_X0,                 offsetof(struct pt_regs, regs[0]));
   DEFINE(S_X2,                 offsetof(struct pt_regs, regs[2]));
@@ -109,6 +112,11 @@ int main(void)
   DEFINE(CPU_APGAKEYLO_EL1,    offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1]));
   DEFINE(HOST_CONTEXT_VCPU,    offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
   DEFINE(HOST_DATA_CONTEXT,    offsetof(struct kvm_host_data, host_ctxt));
+  DEFINE(NVHE_INIT_MAIR_EL2,   offsetof(struct kvm_nvhe_init_params, mair_el2));
+  DEFINE(NVHE_INIT_TCR_EL2,    offsetof(struct kvm_nvhe_init_params, tcr_el2));
+  DEFINE(NVHE_INIT_TPIDR_EL2,  offsetof(struct kvm_nvhe_init_params, tpidr_el2));
+  DEFINE(NVHE_INIT_STACK_HYP_VA,       offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
+  DEFINE(NVHE_INIT_PGD_PA,     offsetof(struct kvm_nvhe_init_params, pgd_pa));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_CTX_SP,           offsetof(struct cpu_suspend_ctx, sp));
index cafaf0d..a634283 100644 (file)
@@ -196,16 +196,6 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
        return is_midr_in_range(midr, &range) && has_dic;
 }
 
-#ifdef CONFIG_RANDOMIZE_BASE
-
-static const struct midr_range ca57_a72[] = {
-       MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
-       MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
-       {},
-};
-
-#endif
-
 #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
 static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
@@ -461,9 +451,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
        },
 #ifdef CONFIG_RANDOMIZE_BASE
        {
-               .desc = "EL2 vector hardening",
-               .capability = ARM64_HARDEN_EL2_VECTORS,
-               ERRATA_MIDR_RANGE_LIST(ca57_a72),
+       /* Must come after the Spectre-v2 entry */
+               .desc = "Spectre-v3a",
+               .capability = ARM64_SPECTRE_V3A,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = has_spectre_v3a,
+               .cpu_enable = spectre_v3a_enable_mitigation,
        },
 #endif
        {
index 39138f6..7ffb5f1 100644 (file)
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
+#include <linux/kasan.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/fpsimd.h>
+#include <asm/kvm_host.h>
 #include <asm/mmu_context.h>
 #include <asm/mte.h>
 #include <asm/processor.h>
@@ -1709,9 +1711,26 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
                cleared_zero_page = true;
                mte_clear_page_tags(lm_alias(empty_zero_page));
        }
+
+       kasan_init_hw_tags_cpu();
 }
 #endif /* CONFIG_ARM64_MTE */
 
+#ifdef CONFIG_KVM
+static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+       if (kvm_get_mode() != KVM_MODE_PROTECTED)
+               return false;
+
+       if (is_kernel_in_hyp_mode()) {
+               pr_warn("Protected KVM not available with VHE\n");
+               return false;
+       }
+
+       return true;
+}
+#endif /* CONFIG_KVM */
+
 /* Internal helper functions to match cpu capability type */
 static bool
 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -1803,6 +1822,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .field_pos = ID_AA64PFR0_EL1_SHIFT,
                .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
        },
+       {
+               .desc = "Protected KVM",
+               .capability = ARM64_KVM_PROTECTED_MODE,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .matches = is_kvm_protected_mode,
+       },
 #endif
        {
                .desc = "Kernel page table isolation (KPTI)",
@@ -2831,14 +2856,28 @@ static int __init enable_mrs_emulation(void)
 
 core_initcall(enable_mrs_emulation);
 
+enum mitigation_state arm64_get_meltdown_state(void)
+{
+       if (__meltdown_safe)
+               return SPECTRE_UNAFFECTED;
+
+       if (arm64_kernel_unmapped_at_el0())
+               return SPECTRE_MITIGATED;
+
+       return SPECTRE_VULNERABLE;
+}
+
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
                          char *buf)
 {
-       if (__meltdown_safe)
+       switch (arm64_get_meltdown_state()) {
+       case SPECTRE_UNAFFECTED:
                return sprintf(buf, "Not affected\n");
 
-       if (arm64_kernel_unmapped_at_el0())
+       case SPECTRE_MITIGATED:
                return sprintf(buf, "Mitigation: PTI\n");
 
-       return sprintf(buf, "Vulnerable\n");
+       default:
+               return sprintf(buf, "Vulnerable\n");
+       }
 }
index 51c7621..2a93fa5 100644 (file)
@@ -173,6 +173,43 @@ alternative_else_nop_endif
 #endif
        .endm
 
+       .macro mte_set_gcr, tmp, tmp2
+#ifdef CONFIG_ARM64_MTE
+       /*
+        * Calculate and set the exclude mask preserving
+        * the RRND (bit[16]) setting.
+        */
+       mrs_s   \tmp2, SYS_GCR_EL1
+       bfi     \tmp2, \tmp, #0, #16
+       msr_s   SYS_GCR_EL1, \tmp2
+       isb
+#endif
+       .endm
+
+       .macro mte_set_kernel_gcr, tmp, tmp2
+#ifdef CONFIG_KASAN_HW_TAGS
+alternative_if_not ARM64_MTE
+       b       1f
+alternative_else_nop_endif
+       ldr_l   \tmp, gcr_kernel_excl
+
+       mte_set_gcr \tmp, \tmp2
+1:
+#endif
+       .endm
+
+       .macro mte_set_user_gcr, tsk, tmp, tmp2
+#ifdef CONFIG_ARM64_MTE
+alternative_if_not ARM64_MTE
+       b       1f
+alternative_else_nop_endif
+       ldr     \tmp, [\tsk, #THREAD_GCR_EL1_USER]
+
+       mte_set_gcr \tmp, \tmp2
+1:
+#endif
+       .endm
+
        .macro  kernel_entry, el, regsize = 64
        .if     \regsize == 32
        mov     w0, w0                          // zero upper 32 bits of x0
@@ -212,6 +249,8 @@ alternative_else_nop_endif
 
        ptrauth_keys_install_kernel tsk, x20, x22, x23
 
+       mte_set_kernel_gcr x22, x23
+
        scs_load tsk, x20
        .else
        add     x21, sp, #S_FRAME_SIZE
@@ -315,6 +354,8 @@ alternative_else_nop_endif
        /* No kernel C function calls after this as user keys are set. */
        ptrauth_keys_install_user tsk, x0, x1, x2
 
+       mte_set_user_gcr tsk, x0, x1
+
        apply_ssbd 0, x0, x1
        .endif
 
index f2eb206..a0dc987 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <linux/linkage.h>
 #include <linux/init.h>
-#include <linux/irqchip/arm-gic-v3.h>
 #include <linux/pgtable.h>
 
 #include <asm/asm_pointer_auth.h>
@@ -21,6 +20,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/el2_setup.h>
 #include <asm/elf.h>
 #include <asm/image.h>
 #include <asm/kernel-pgtable.h>
@@ -433,7 +433,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
        bl      __pi_memset
        dsb     ishst                           // Make zero page visible to PTW
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        bl      kasan_early_init
 #endif
 #ifdef CONFIG_RANDOMIZE_BASE
@@ -493,155 +493,56 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
        eret
 
 SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
-       mov_q   x0, INIT_SCTLR_EL2_MMU_OFF
-       msr     sctlr_el2, x0
-
 #ifdef CONFIG_ARM64_VHE
        /*
-        * Check for VHE being present. For the rest of the EL2 setup,
-        * x2 being non-zero indicates that we do have VHE, and that the
-        * kernel is intended to run at EL2.
+        * Check for VHE being present. x2 being non-zero indicates that we
+        * do have VHE, and that the kernel is intended to run at EL2.
         */
        mrs     x2, id_aa64mmfr1_el1
        ubfx    x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
 #else
        mov     x2, xzr
 #endif
+       cbz     x2, init_el2_nvhe
 
-       /* Hyp configuration. */
-       mov_q   x0, HCR_HOST_NVHE_FLAGS
-       cbz     x2, set_hcr
+       /*
+        * When VHE _is_ in use, EL1 will not be used in the host and
+        * requires no configuration, and all non-hyp-specific EL2 setup
+        * will be done via the _EL1 system register aliases in __cpu_setup.
+        */
        mov_q   x0, HCR_HOST_VHE_FLAGS
-set_hcr:
        msr     hcr_el2, x0
        isb
 
-       /*
-        * Allow Non-secure EL1 and EL0 to access physical timer and counter.
-        * This is not necessary for VHE, since the host kernel runs in EL2,
-        * and EL0 accesses are configured in the later stage of boot process.
-        * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
-        * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
-        * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
-        * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
-        * EL2.
-        */
-       cbnz    x2, 1f
-       mrs     x0, cnthctl_el2
-       orr     x0, x0, #3                      // Enable EL1 physical timers
-       msr     cnthctl_el2, x0
-1:
-       msr     cntvoff_el2, xzr                // Clear virtual offset
-
-#ifdef CONFIG_ARM_GIC_V3
-       /* GICv3 system register access */
-       mrs     x0, id_aa64pfr0_el1
-       ubfx    x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
-       cbz     x0, 3f
-
-       mrs_s   x0, SYS_ICC_SRE_EL2
-       orr     x0, x0, #ICC_SRE_EL2_SRE        // Set ICC_SRE_EL2.SRE==1
-       orr     x0, x0, #ICC_SRE_EL2_ENABLE     // Set ICC_SRE_EL2.Enable==1
-       msr_s   SYS_ICC_SRE_EL2, x0
-       isb                                     // Make sure SRE is now set
-       mrs_s   x0, SYS_ICC_SRE_EL2             // Read SRE back,
-       tbz     x0, #0, 3f                      // and check that it sticks
-       msr_s   SYS_ICH_HCR_EL2, xzr            // Reset ICC_HCR_EL2 to defaults
-
-3:
-#endif
-
-       /* Populate ID registers. */
-       mrs     x0, midr_el1
-       mrs     x1, mpidr_el1
-       msr     vpidr_el2, x0
-       msr     vmpidr_el2, x1
-
-#ifdef CONFIG_COMPAT
-       msr     hstr_el2, xzr                   // Disable CP15 traps to EL2
-#endif
-
-       /* EL2 debug */
-       mrs     x1, id_aa64dfr0_el1
-       sbfx    x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
-       cmp     x0, #1
-       b.lt    4f                              // Skip if no PMU present
-       mrs     x0, pmcr_el0                    // Disable debug access traps
-       ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
-4:
-       csel    x3, xzr, x0, lt                 // all PMU counters from EL1
-
-       /* Statistical profiling */
-       ubfx    x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
-       cbz     x0, 7f                          // Skip if SPE not present
-       cbnz    x2, 6f                          // VHE?
-       mrs_s   x4, SYS_PMBIDR_EL1              // If SPE available at EL2,
-       and     x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT)
-       cbnz    x4, 5f                          // then permit sampling of physical
-       mov     x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \
-                     1 << SYS_PMSCR_EL2_PA_SHIFT)
-       msr_s   SYS_PMSCR_EL2, x4               // addresses and physical counter
-5:
-       mov     x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
-       orr     x3, x3, x1                      // If we don't have VHE, then
-       b       7f                              // use EL1&0 translation.
-6:                                             // For VHE, use EL2 translation
-       orr     x3, x3, #MDCR_EL2_TPMS          // and disable access from EL1
-7:
-       msr     mdcr_el2, x3                    // Configure debug traps
-
-       /* LORegions */
-       mrs     x1, id_aa64mmfr1_el1
-       ubfx    x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
-       cbz     x0, 1f
-       msr_s   SYS_LORC_EL1, xzr
-1:
-
-       /* Stage-2 translation */
-       msr     vttbr_el2, xzr
-
-       cbz     x2, install_el2_stub
+       init_el2_state vhe
 
        isb
+
        mov_q   x0, INIT_PSTATE_EL2
        msr     spsr_el2, x0
        msr     elr_el2, lr
        mov     w0, #BOOT_CPU_MODE_EL2
        eret
 
-SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL)
+SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL)
        /*
         * When VHE is not in use, early init of EL2 and EL1 needs to be
         * done here.
-        * When VHE _is_ in use, EL1 will not be used in the host and
-        * requires no configuration, and all non-hyp-specific EL2 setup
-        * will be done via the _EL1 system register aliases in __cpu_setup.
         */
        mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
        msr     sctlr_el1, x0
 
-       /* Coprocessor traps. */
-       mov     x0, #0x33ff
-       msr     cptr_el2, x0                    // Disable copro. traps to EL2
-
-       /* SVE register access */
-       mrs     x1, id_aa64pfr0_el1
-       ubfx    x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
-       cbz     x1, 7f
-
-       bic     x0, x0, #CPTR_EL2_TZ            // Also disable SVE traps
-       msr     cptr_el2, x0                    // Disable copro. traps to EL2
+       mov_q   x0, HCR_HOST_NVHE_FLAGS
+       msr     hcr_el2, x0
        isb
-       mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
-       msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
+
+       init_el2_state nvhe
 
        /* Hypervisor stub */
-7:     adr_l   x0, __hyp_stub_vectors
+       adr_l   x0, __hyp_stub_vectors
        msr     vbar_el2, x0
-
        isb
-       mov     x0, #INIT_PSTATE_EL1
-       msr     spsr_el2, x0
+
        msr     elr_el2, lr
        mov     w0, #BOOT_CPU_MODE_EL2
        eret
index 4200377..9c9f47e 100644 (file)
@@ -371,6 +371,11 @@ static void swsusp_mte_restore_tags(void)
                unsigned long pfn = xa_state.xa_index;
                struct page *page = pfn_to_online_page(pfn);
 
+               /*
+                * It is not required to invoke page_kasan_tag_reset(page)
+                * at this point since the tags stored in page->flags are
+                * already restored.
+                */
                mte_restore_page_tags(page_address(page), tags);
 
                mte_free_tag_storage(tags);
index c615b28..f676243 100644 (file)
@@ -37,7 +37,7 @@ __efistub_strncmp             = __pi_strncmp;
 __efistub_strrchr              = __pi_strrchr;
 __efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 __efistub___memcpy             = __pi_memcpy;
 __efistub___memmove            = __pi_memmove;
 __efistub___memset             = __pi_memset;
@@ -64,13 +64,12 @@ __efistub__ctype            = _ctype;
 /* Alternative callbacks for init-time patching of nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_patch_vector_branch);
 KVM_NVHE_ALIAS(kvm_update_va_mask);
+KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
+KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
 
 /* Global kernel state accessed by nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
 
-/* Kernel constant needed to compute idmap addresses. */
-KVM_NVHE_ALIAS(kimage_voffset);
-
 /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
 KVM_NVHE_ALIAS(__hyp_panic_string);
 KVM_NVHE_ALIAS(panic);
@@ -78,9 +77,6 @@ KVM_NVHE_ALIAS(panic);
 /* Vectors installed by hyp-init on reset HVC. */
 KVM_NVHE_ALIAS(__hyp_stub_vectors);
 
-/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */
-KVM_NVHE_ALIAS(idmap_t0sz);
-
 /* Kernel symbol used by icache_is_vpipt(). */
 KVM_NVHE_ALIAS(__icache_flags);
 
@@ -103,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
 KVM_NVHE_ALIAS(__start___kvm_ex_table);
 KVM_NVHE_ALIAS(__stop___kvm_ex_table);
 
+/* Array containing bases of nVHE per-CPU memory regions. */
+KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
+
 #endif /* CONFIG_KVM */
 
 #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
index 0921aa1..1c74c45 100644 (file)
@@ -161,7 +161,8 @@ u64 __init kaslr_early_init(u64 dt_phys)
        /* use the top 16 bits to randomize the linear region */
        memstart_offset_seed = seed >> 48;
 
-       if (IS_ENABLED(CONFIG_KASAN))
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+           IS_ENABLED(CONFIG_KASAN_SW_TAGS))
                /*
                 * KASAN does not expect the module region to intersect the
                 * vmalloc region, since shadow memory is allocated for each
index 2a1ad95..fe21e0f 100644 (file)
@@ -30,7 +30,8 @@ void *module_alloc(unsigned long size)
        if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
                gfp_mask |= __GFP_NOWARN;
 
-       if (IS_ENABLED(CONFIG_KASAN))
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+           IS_ENABLED(CONFIG_KASAN_SW_TAGS))
                /* don't exceed the static module region - see below */
                module_alloc_end = MODULES_END;
 
@@ -39,7 +40,8 @@ void *module_alloc(unsigned long size)
                                NUMA_NO_NODE, __builtin_return_address(0));
 
        if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
-           !IS_ENABLED(CONFIG_KASAN))
+           !IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+           !IS_ENABLED(CONFIG_KASAN_SW_TAGS))
                /*
                 * KASAN can only deal with module allocations being served
                 * from the reserved module region, since the remainder of
index ef15c8a..dc9ada6 100644 (file)
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/thread_info.h>
+#include <linux/types.h>
 #include <linux/uio.h>
 
+#include <asm/barrier.h>
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
+#include <asm/mte-kasan.h>
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
+u64 gcr_kernel_excl __ro_after_init;
+
 static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
 {
        pte_t old_pte = READ_ONCE(*ptep);
@@ -31,6 +36,15 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
                        return;
        }
 
+       page_kasan_tag_reset(page);
+       /*
+        * We need smp_wmb() in between setting the flags and clearing the
+        * tags because if another thread reads page->flags and builds a
+        * tagged address out of it, there is an actual dependency to the
+        * memory access, but on the current thread we do not guarantee that
+        * the new page->flags are visible before the tags were updated.
+        */
+       smp_wmb();
        mte_clear_page_tags(page_address(page));
 }
 
@@ -72,6 +86,78 @@ int memcmp_pages(struct page *page1, struct page *page2)
        return ret;
 }
 
+u8 mte_get_mem_tag(void *addr)
+{
+       if (!system_supports_mte())
+               return 0xFF;
+
+       asm(__MTE_PREAMBLE "ldg %0, [%0]"
+           : "+r" (addr));
+
+       return mte_get_ptr_tag(addr);
+}
+
+u8 mte_get_random_tag(void)
+{
+       void *addr;
+
+       if (!system_supports_mte())
+               return 0xFF;
+
+       asm(__MTE_PREAMBLE "irg %0, %0"
+           : "+r" (addr));
+
+       return mte_get_ptr_tag(addr);
+}
+
+void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
+{
+       void *ptr = addr;
+
+       if ((!system_supports_mte()) || (size == 0))
+               return addr;
+
+       /* Make sure that size is MTE granule aligned. */
+       WARN_ON(size & (MTE_GRANULE_SIZE - 1));
+
+       /* Make sure that the address is MTE granule aligned. */
+       WARN_ON((u64)addr & (MTE_GRANULE_SIZE - 1));
+
+       tag = 0xF0 | tag;
+       ptr = (void *)__tag_set(ptr, tag);
+
+       mte_assign_mem_tag_range(ptr, size);
+
+       return ptr;
+}
+
+void mte_init_tags(u64 max_tag)
+{
+       static bool gcr_kernel_excl_initialized;
+
+       if (!gcr_kernel_excl_initialized) {
+               /*
+                * The format of the tags in KASAN is 0xFF and in MTE is 0xF.
+                * This conversion extracts an MTE tag from a KASAN tag.
+                */
+               u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT,
+                                            max_tag), 0);
+
+               gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
+               gcr_kernel_excl_initialized = true;
+       }
+
+       /* Enable the kernel exclude mask for random tags generation. */
+       write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
+}
+
+void mte_enable_kernel(void)
+{
+       /* Enable MTE Sync Mode for EL1. */
+       sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_SYNC);
+       isb();
+}
+
 static void update_sctlr_el1_tcf0(u64 tcf0)
 {
        /* ISB required for the kernel uaccess routines */
@@ -92,23 +178,26 @@ static void set_sctlr_el1_tcf0(u64 tcf0)
        preempt_enable();
 }
 
-static void update_gcr_el1_excl(u64 incl)
+static void update_gcr_el1_excl(u64 excl)
 {
-       u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
 
        /*
-        * Note that 'incl' is an include mask (controlled by the user via
-        * prctl()) while GCR_EL1 accepts an exclude mask.
+        * Note that the mask controlled by the user via prctl() is an
+        * include while GCR_EL1 accepts an exclude mask.
         * No need for ISB since this only affects EL0 currently, implicit
         * with ERET.
         */
        sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
 }
 
-static void set_gcr_el1_excl(u64 incl)
+static void set_gcr_el1_excl(u64 excl)
 {
-       current->thread.gcr_user_incl = incl;
-       update_gcr_el1_excl(incl);
+       current->thread.gcr_user_excl = excl;
+
+       /*
+        * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value
+        * by mte_set_user_gcr() in kernel_exit,
+        */
 }
 
 void flush_mte_state(void)
@@ -123,7 +212,7 @@ void flush_mte_state(void)
        /* disable tag checking */
        set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
        /* reset tag generation mask */
-       set_gcr_el1_excl(0);
+       set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
 }
 
 void mte_thread_switch(struct task_struct *next)
@@ -134,7 +223,6 @@ void mte_thread_switch(struct task_struct *next)
        /* avoid expensive SCTLR_EL1 accesses if no change */
        if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
                update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
-       update_gcr_el1_excl(next->thread.gcr_user_incl);
 }
 
 void mte_suspend_exit(void)
@@ -142,13 +230,14 @@ void mte_suspend_exit(void)
        if (!system_supports_mte())
                return;
 
-       update_gcr_el1_excl(current->thread.gcr_user_incl);
+       update_gcr_el1_excl(gcr_kernel_excl);
 }
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
        u64 tcf0;
-       u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT;
+       u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
+                      SYS_GCR_EL1_EXCL_MASK;
 
        if (!system_supports_mte())
                return 0;
@@ -169,10 +258,10 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
        if (task != current) {
                task->thread.sctlr_tcf0 = tcf0;
-               task->thread.gcr_user_incl = gcr_incl;
+               task->thread.gcr_user_excl = gcr_excl;
        } else {
                set_sctlr_el1_tcf0(tcf0);
-               set_gcr_el1_excl(gcr_incl);
+               set_gcr_el1_excl(gcr_excl);
        }
 
        return 0;
@@ -181,11 +270,12 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 long get_mte_ctrl(struct task_struct *task)
 {
        unsigned long ret;
+       u64 incl = ~task->thread.gcr_user_excl & SYS_GCR_EL1_EXCL_MASK;
 
        if (!system_supports_mte())
                return 0;
 
-       ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT;
+       ret = incl << PR_MTE_TAG_SHIFT;
 
        switch (task->thread.sctlr_tcf0) {
        case SCTLR_EL1_TCF0_NONE:
index 4c25c00..902e408 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as
+ * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as
  * detailed at:
  *
  *   https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability
@@ -27,6 +27,7 @@
 #include <asm/insn.h>
 #include <asm/spectre.h>
 #include <asm/traps.h>
+#include <asm/virt.h>
 
 /*
  * We try to ensure that the mitigation state can never change as the result of
@@ -171,72 +172,26 @@ bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope)
        return true;
 }
 
-DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
 enum mitigation_state arm64_get_spectre_v2_state(void)
 {
        return spectre_v2_state;
 }
 
-#ifdef CONFIG_KVM
-#include <asm/cacheflush.h>
-#include <asm/kvm_asm.h>
-
-atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
-
-static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
-                               const char *hyp_vecs_end)
-{
-       void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K);
-       int i;
-
-       for (i = 0; i < SZ_2K; i += 0x80)
-               memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
-
-       __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
-}
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
 static void install_bp_hardening_cb(bp_hardening_cb_t fn)
 {
-       static DEFINE_RAW_SPINLOCK(bp_lock);
-       int cpu, slot = -1;
-       const char *hyp_vecs_start = __smccc_workaround_1_smc;
-       const char *hyp_vecs_end = __smccc_workaround_1_smc +
-                                  __SMCCC_WORKAROUND_1_SMC_SZ;
+       __this_cpu_write(bp_hardening_data.fn, fn);
 
        /*
         * Vinz Clortho takes the hyp_vecs start/end "keys" at
         * the door when we're a guest. Skip the hyp-vectors work.
         */
-       if (!is_hyp_mode_available()) {
-               __this_cpu_write(bp_hardening_data.fn, fn);
+       if (!is_hyp_mode_available())
                return;
-       }
-
-       raw_spin_lock(&bp_lock);
-       for_each_possible_cpu(cpu) {
-               if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
-                       slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
-                       break;
-               }
-       }
-
-       if (slot == -1) {
-               slot = atomic_inc_return(&arm64_el2_vector_last_slot);
-               BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
-               __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
-       }
 
-       __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
-       __this_cpu_write(bp_hardening_data.fn, fn);
-       raw_spin_unlock(&bp_lock);
-}
-#else
-static void install_bp_hardening_cb(bp_hardening_cb_t fn)
-{
-       __this_cpu_write(bp_hardening_data.fn, fn);
+       __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT);
 }
-#endif /* CONFIG_KVM */
 
 static void call_smc_arch_workaround_1(void)
 {
@@ -318,6 +273,33 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
 }
 
 /*
+ * Spectre-v3a.
+ *
+ * Phew, there's not an awful lot to do here! We just instruct EL2 to use
+ * an indirect trampoline for the hyp vectors so that guests can't read
+ * VBAR_EL2 to defeat randomisation of the hypervisor VA layout.
+ */
+bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope)
+{
+       static const struct midr_range spectre_v3a_unsafe_list[] = {
+               MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+               MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+               {},
+       };
+
+       WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+       return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list);
+}
+
+void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+       struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+
+       if (this_cpu_has_cap(ARM64_SPECTRE_V3A))
+               data->slot += HYP_VECTOR_INDIRECT;
+}
+
+/*
  * Spectre v4.
  *
  * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is
index 1a57a76..c18aacd 100644 (file)
@@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions);
 
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
-u64 cpu_logical_map(int cpu)
+u64 cpu_logical_map(unsigned int cpu)
 {
        return __cpu_logical_map[cpu];
 }
@@ -358,7 +358,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
        smp_build_mpidr_hash();
 
        /* Init percpu seeds for random tags after cpus are set up. */
-       kasan_init_tags();
+       kasan_init_sw_tags();
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
        /*
index 4be7f7e..6bdef73 100644 (file)
@@ -133,7 +133,7 @@ SYM_FUNC_START(_cpu_resume)
         */
        bl      cpu_do_resume
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK
        mov     x0, sp
        bl      kasan_unpoison_task_stack_below
 #endif
index 2499b89..19b1705 100644 (file)
@@ -462,6 +462,8 @@ void __init smp_prepare_boot_cpu(void)
        /* Conditionally switch to GIC PMR for interrupt masking */
        if (system_uses_irq_prio_masking())
                init_gic_priority_masking();
+
+       kasan_init_hw_tags();
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)
index c8308be..f6faa69 100644 (file)
@@ -314,7 +314,7 @@ void topology_scale_freq_tick(void)
 
        if (unlikely(core_cnt <= prev_core_cnt ||
                     const_cnt <= prev_const_cnt))
-               goto store_and_exit;
+               return;
 
        /*
         *          /\core    arch_max_freq_scale
@@ -331,10 +331,6 @@ void topology_scale_freq_tick(void)
 
        scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
        this_cpu_write(freq_scale, (unsigned long)scale);
-
-store_and_exit:
-       this_cpu_write(arch_core_cycles_prev, core_cnt);
-       this_cpu_write(arch_const_cycles_prev, const_cnt);
 }
 
 #ifdef CONFIG_ACPI_CPPC_LIB
index 5d5857c..4c0b0c8 100644 (file)
@@ -30,6 +30,13 @@ jiffies = jiffies_64;
        *(__kvm_ex_table)                                       \
        __stop___kvm_ex_table = .;
 
+#define HYPERVISOR_DATA_SECTIONS                               \
+       HYP_SECTION_NAME(.data..ro_after_init) : {              \
+               __hyp_data_ro_after_init_start = .;             \
+               *(HYP_SECTION_NAME(.data..ro_after_init))       \
+               __hyp_data_ro_after_init_end = .;               \
+       }
+
 #define HYPERVISOR_PERCPU_SECTION                              \
        . = ALIGN(PAGE_SIZE);                                   \
        HYP_SECTION_NAME(.data..percpu) : {                     \
@@ -37,6 +44,7 @@ jiffies = jiffies_64;
        }
 #else /* CONFIG_KVM */
 #define HYPERVISOR_EXTABLE
+#define HYPERVISOR_DATA_SECTIONS
 #define HYPERVISOR_PERCPU_SECTION
 #endif
 
@@ -232,6 +240,8 @@ SECTIONS
        _sdata = .;
        RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 
+       HYPERVISOR_DATA_SECTIONS
+
        /*
         * Data written with the MMU off but read with the MMU on requires
         * cache lines to be invalidated, discarding up to a Cache Writeback
index 1504c81..60fd181 100644 (file)
@@ -13,10 +13,10 @@ obj-$(CONFIG_KVM) += hyp/
 kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
         $(KVM)/vfio.o $(KVM)/irqchip.o \
         arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
-        inject_fault.o regmap.o va_layout.o handle_exit.o \
+        inject_fault.o va_layout.o handle_exit.o \
         guest.o debug.o reset.o sys_regs.o \
         vgic-sys-reg-v3.o fpsimd.o pmu.o \
-        aarch32.o arch_timer.o \
+        arch_timer.o \
         vgic/vgic.o vgic/vgic-init.o \
         vgic/vgic-irqfd.o vgic/vgic-v2.o \
         vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c
deleted file mode 100644 (file)
index 40a62a9..0000000
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (not much of an) Emulation layer for 32bit guests.
- *
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * based on arch/arm/kvm/emulate.c
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/bits.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-
-#define DFSR_FSC_EXTABT_LPAE   0x10
-#define DFSR_FSC_EXTABT_nLPAE  0x08
-#define DFSR_LPAE              BIT(9)
-
-/*
- * Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
- */
-static const u8 return_offsets[8][2] = {
-       [0] = { 0, 0 },         /* Reset, unused */
-       [1] = { 4, 2 },         /* Undefined */
-       [2] = { 0, 0 },         /* SVC, unused */
-       [3] = { 4, 4 },         /* Prefetch abort */
-       [4] = { 8, 8 },         /* Data abort */
-       [5] = { 0, 0 },         /* HVC, unused */
-       [6] = { 4, 4 },         /* IRQ, unused */
-       [7] = { 4, 4 },         /* FIQ, unused */
-};
-
-static bool pre_fault_synchronize(struct kvm_vcpu *vcpu)
-{
-       preempt_disable();
-       if (vcpu->arch.sysregs_loaded_on_cpu) {
-               kvm_arch_vcpu_put(vcpu);
-               return true;
-       }
-
-       preempt_enable();
-       return false;
-}
-
-static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded)
-{
-       if (loaded) {
-               kvm_arch_vcpu_load(vcpu, smp_processor_id());
-               preempt_enable();
-       }
-}
-
-/*
- * When an exception is taken, most CPSR fields are left unchanged in the
- * handler. However, some are explicitly overridden (e.g. M[4:0]).
- *
- * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
- * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
- * obsoleted by the ARMv7 virtualization extensions and is RES0.
- *
- * For the SPSR layout seen from AArch32, see:
- * - ARM DDI 0406C.d, page B1-1148
- * - ARM DDI 0487E.a, page G8-6264
- *
- * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
- * - ARM DDI 0487E.a, page C5-426
- *
- * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
- * MSB to LSB.
- */
-static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
-{
-       u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
-       unsigned long old, new;
-
-       old = *vcpu_cpsr(vcpu);
-       new = 0;
-
-       new |= (old & PSR_AA32_N_BIT);
-       new |= (old & PSR_AA32_Z_BIT);
-       new |= (old & PSR_AA32_C_BIT);
-       new |= (old & PSR_AA32_V_BIT);
-       new |= (old & PSR_AA32_Q_BIT);
-
-       // CPSR.IT[7:0] are set to zero upon any exception
-       // See ARM DDI 0487E.a, section G1.12.3
-       // See ARM DDI 0406C.d, section B1.8.3
-
-       new |= (old & PSR_AA32_DIT_BIT);
-
-       // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
-       // See ARM DDI 0487E.a, page G8-6244
-       if (sctlr & BIT(31))
-               new |= PSR_AA32_SSBS_BIT;
-
-       // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
-       // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
-       // See ARM DDI 0487E.a, page G8-6246
-       new |= (old & PSR_AA32_PAN_BIT);
-       if (!(sctlr & BIT(23)))
-               new |= PSR_AA32_PAN_BIT;
-
-       // SS does not exist in AArch32, so ignore
-
-       // CPSR.IL is set to zero upon any exception
-       // See ARM DDI 0487E.a, page G1-5527
-
-       new |= (old & PSR_AA32_GE_MASK);
-
-       // CPSR.IT[7:0] are set to zero upon any exception
-       // See prior comment above
-
-       // CPSR.E is set to SCTLR.EE upon any exception
-       // See ARM DDI 0487E.a, page G8-6245
-       // See ARM DDI 0406C.d, page B4-1701
-       if (sctlr & BIT(25))
-               new |= PSR_AA32_E_BIT;
-
-       // CPSR.A is unchanged upon an exception to Undefined, Supervisor
-       // CPSR.A is set upon an exception to other modes
-       // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
-       // See ARM DDI 0406C.d, page B1-1182
-       new |= (old & PSR_AA32_A_BIT);
-       if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
-               new |= PSR_AA32_A_BIT;
-
-       // CPSR.I is set upon any exception
-       // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
-       // See ARM DDI 0406C.d, page B1-1182
-       new |= PSR_AA32_I_BIT;
-
-       // CPSR.F is set upon an exception to FIQ
-       // CPSR.F is unchanged upon an exception to other modes
-       // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
-       // See ARM DDI 0406C.d, page B1-1182
-       new |= (old & PSR_AA32_F_BIT);
-       if (mode == PSR_AA32_MODE_FIQ)
-               new |= PSR_AA32_F_BIT;
-
-       // CPSR.T is set to SCTLR.TE upon any exception
-       // See ARM DDI 0487E.a, page G8-5514
-       // See ARM DDI 0406C.d, page B1-1181
-       if (sctlr & BIT(30))
-               new |= PSR_AA32_T_BIT;
-
-       new |= mode;
-
-       return new;
-}
-
-static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
-{
-       unsigned long spsr = *vcpu_cpsr(vcpu);
-       bool is_thumb = (spsr & PSR_AA32_T_BIT);
-       u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
-       u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
-
-       *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);
-
-       /* Note: These now point to the banked copies */
-       vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr));
-       *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
-
-       /* Branch to exception vector */
-       if (sctlr & (1 << 13))
-               vect_offset += 0xffff0000;
-       else /* always have security exceptions */
-               vect_offset += vcpu_cp15(vcpu, c12_VBAR);
-
-       *vcpu_pc(vcpu) = vect_offset;
-}
-
-void kvm_inject_undef32(struct kvm_vcpu *vcpu)
-{
-       bool loaded = pre_fault_synchronize(vcpu);
-
-       prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4);
-       post_fault_synchronize(vcpu, loaded);
-}
-
-/*
- * Modelled after TakeDataAbortException() and TakePrefetchAbortException
- * pseudocode.
- */
-static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
-                        unsigned long addr)
-{
-       u32 vect_offset;
-       u32 *far, *fsr;
-       bool is_lpae;
-       bool loaded;
-
-       loaded = pre_fault_synchronize(vcpu);
-
-       if (is_pabt) {
-               vect_offset = 12;
-               far = &vcpu_cp15(vcpu, c6_IFAR);
-               fsr = &vcpu_cp15(vcpu, c5_IFSR);
-       } else { /* !iabt */
-               vect_offset = 16;
-               far = &vcpu_cp15(vcpu, c6_DFAR);
-               fsr = &vcpu_cp15(vcpu, c5_DFSR);
-       }
-
-       prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset);
-
-       *far = addr;
-
-       /* Give the guest an IMPLEMENTATION DEFINED exception */
-       is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
-       if (is_lpae) {
-               *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
-       } else {
-               /* no need to shuffle FS[4] into DFSR[10] as its 0 */
-               *fsr = DFSR_FSC_EXTABT_nLPAE;
-       }
-
-       post_fault_synchronize(vcpu, loaded);
-}
-
-void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr)
-{
-       inject_abt32(vcpu, false, addr);
-}
-
-void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr)
-{
-       inject_abt32(vcpu, true, addr);
-}
index c0ffb01..6e637d2 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
 #include <linux/sched/stat.h>
+#include <linux/psci.h>
 #include <trace/events/kvm.h>
 
 #define CREATE_TRACE_POINTS
@@ -35,7 +36,6 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
 #include <asm/sections.h>
 
 #include <kvm/arm_hypercalls.h>
 __asm__(".arch_extension       virt");
 #endif
 
+static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
+DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
 
 static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
+DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
@@ -61,6 +65,10 @@ static bool vgic_present;
 static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
+extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS];
+extern u32 kvm_nvhe_sym(kvm_host_psci_version);
+extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids);
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
        return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -102,7 +110,7 @@ static int kvm_arm_default_max_vcpus(void)
        return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 }
 
-static void set_default_csv2(struct kvm *kvm)
+static void set_default_spectre(struct kvm *kvm)
 {
        /*
         * The default is to expose CSV2 == 1 if the HW isn't affected.
@@ -114,6 +122,8 @@ static void set_default_csv2(struct kvm *kvm)
         */
        if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
                kvm->arch.pfr0_csv2 = 1;
+       if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
+               kvm->arch.pfr0_csv3 = 1;
 }
 
 /**
@@ -141,7 +151,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        /* The maximum number of VCPUs is limited by the host's GIC model */
        kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
 
-       set_default_csv2(kvm);
+       set_default_spectre(kvm);
 
        return ret;
 out_free_stage2_pgd:
@@ -198,6 +208,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
        case KVM_CAP_ARM_NISV_TO_USER:
        case KVM_CAP_ARM_INJECT_EXT_DABT:
+       case KVM_CAP_SET_GUEST_DEBUG:
+       case KVM_CAP_VCPU_ATTRIBUTES:
                r = 1;
                break;
        case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -229,10 +241,35 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_STEAL_TIME:
                r = kvm_arm_pvtime_supported();
                break;
-       default:
-               r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
+       case KVM_CAP_ARM_EL1_32BIT:
+               r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
+               break;
+       case KVM_CAP_GUEST_DEBUG_HW_BPS:
+               r = get_num_brps();
+               break;
+       case KVM_CAP_GUEST_DEBUG_HW_WPS:
+               r = get_num_wrps();
+               break;
+       case KVM_CAP_ARM_PMU_V3:
+               r = kvm_arm_support_pmu_v3();
+               break;
+       case KVM_CAP_ARM_INJECT_SERROR_ESR:
+               r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
                break;
+       case KVM_CAP_ARM_VM_IPA_SIZE:
+               r = get_kvm_ipa_limit();
+               break;
+       case KVM_CAP_ARM_SVE:
+               r = system_supports_sve();
+               break;
+       case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+       case KVM_CAP_ARM_PTRAUTH_GENERIC:
+               r = system_has_full_ptr_auth();
+               break;
+       default:
+               r = 0;
        }
+
        return r;
 }
 
@@ -1311,47 +1348,52 @@ static unsigned long nvhe_percpu_order(void)
        return size ? get_order(size) : 0;
 }
 
-static int kvm_map_vectors(void)
+/* A lookup table holding the hypervisor VA for each vector slot */
+static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
+
+static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot)
 {
-       /*
-        * SV2  = ARM64_SPECTRE_V2
-        * HEL2 = ARM64_HARDEN_EL2_VECTORS
-        *
-        * !SV2 + !HEL2 -> use direct vectors
-        *  SV2 + !HEL2 -> use hardened vectors in place
-        * !SV2 +  HEL2 -> allocate one vector slot and use exec mapping
-        *  SV2 +  HEL2 -> use hardened vectors and use exec mapping
-        */
-       if (cpus_have_const_cap(ARM64_SPECTRE_V2)) {
-               __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs);
-               __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
-       }
+       return slot - (slot != HYP_VECTOR_DIRECT);
+}
 
-       if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
-               phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs);
-               unsigned long size = __BP_HARDEN_HYP_VECS_SZ;
+static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
+{
+       int idx = __kvm_vector_slot2idx(slot);
 
-               /*
-                * Always allocate a spare vector slot, as we don't
-                * know yet which CPUs have a BP hardening slot that
-                * we can reuse.
-                */
-               __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
-               BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
-               return create_hyp_exec_mappings(vect_pa, size,
-                                               &__kvm_bp_vect_base);
+       hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K);
+}
+
+static int kvm_init_vector_slots(void)
+{
+       int err;
+       void *base;
+
+       base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
+       kvm_init_vector_slot(base, HYP_VECTOR_DIRECT);
+
+       base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
+       kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
+
+       if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
+               return 0;
+
+       if (!has_vhe()) {
+               err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
+                                              __BP_HARDEN_HYP_VECS_SZ, &base);
+               if (err)
+                       return err;
        }
 
+       kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT);
+       kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT);
        return 0;
 }
 
 static void cpu_init_hyp_mode(void)
 {
-       phys_addr_t pgd_ptr;
-       unsigned long hyp_stack_ptr;
-       unsigned long vector_ptr;
-       unsigned long tpidr_el2;
+       struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params);
        struct arm_smccc_res res;
+       unsigned long tcr;
 
        /* Switch from the HYP stub to our own HYP init vector */
        __hyp_set_vectors(kvm_get_idmap_vector());
@@ -1361,13 +1403,38 @@ static void cpu_init_hyp_mode(void)
         * kernel's mapping to the linear mapping, and store it in tpidr_el2
         * so that we can use adr_l to access per-cpu variables in EL2.
         */
-       tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
-                   (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
+       params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) -
+                           (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
+
+       params->mair_el2 = read_sysreg(mair_el1);
+
+       /*
+        * The ID map may be configured to use an extended virtual address
+        * range. This is only the case if system RAM is out of range for the
+        * currently configured page size and VA_BITS, in which case we will
+        * also need the extended virtual range for the HYP ID map, or we won't
+        * be able to enable the EL2 MMU.
+        *
+        * However, at EL2, there is only one TTBR register, and we can't switch
+        * between translation tables *and* update TCR_EL2.T0SZ at the same
+        * time. Bottom line: we need to use the extended range with *both* our
+        * translation tables.
+        *
+        * So use the same T0SZ value we use for the ID map.
+        */
+       tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
+       tcr &= ~TCR_T0SZ_MASK;
+       tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
+       params->tcr_el2 = tcr;
+
+       params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
+       params->pgd_pa = kvm_mmu_get_httbr();
 
-       pgd_ptr = kvm_mmu_get_httbr();
-       hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE;
-       hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr);
-       vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector));
+       /*
+        * Flush the init params from the data cache because the struct will
+        * be read while the MMU is off.
+        */
+       kvm_flush_dcache_to_poc(params, sizeof(*params));
 
        /*
         * Call initialization code, and switch to the full blown HYP code.
@@ -1376,8 +1443,7 @@ static void cpu_init_hyp_mode(void)
         * cpus_have_const_cap() wrapper.
         */
        BUG_ON(!system_capabilities_finalized());
-       arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init),
-                         pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res);
+       arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
        WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
 
        /*
@@ -1396,13 +1462,40 @@ static void cpu_hyp_reset(void)
                __hyp_reset_vectors();
 }
 
+/*
+ * EL2 vectors can be mapped and rerouted in a number of ways,
+ * depending on the kernel configuration and CPU present:
+ *
+ * - If the CPU is affected by Spectre-v2, the hardening sequence is
+ *   placed in one of the vector slots, which is executed before jumping
+ *   to the real vectors.
+ *
+ * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot
+ *   containing the hardening sequence is mapped next to the idmap page,
+ *   and executed before jumping to the real vectors.
+ *
+ * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an
+ *   empty slot is selected, mapped next to the idmap page, and
+ *   executed before jumping to the real vectors.
+ *
+ * Note that ARM64_SPECTRE_V3A is somewhat incompatible with
+ * VHE, as we don't have hypervisor-specific mappings. If the system
+ * is VHE and yet selects this capability, it will be ignored.
+ */
+static void cpu_set_hyp_vector(void)
+{
+       struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
+       void *vector = hyp_spectre_vector_selector[data->slot];
+
+       *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
+}
+
 static void cpu_hyp_reinit(void)
 {
        kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
 
        cpu_hyp_reset();
-
-       *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector();
+       cpu_set_hyp_vector();
 
        if (is_kernel_in_hyp_mode())
                kvm_timer_init_vhe();
@@ -1439,7 +1532,8 @@ static void _kvm_arch_hardware_disable(void *discard)
 
 void kvm_arch_hardware_disable(void)
 {
-       _kvm_arch_hardware_disable(NULL);
+       if (!is_protected_kvm_enabled())
+               _kvm_arch_hardware_disable(NULL);
 }
 
 #ifdef CONFIG_CPU_PM
@@ -1482,11 +1576,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = {
 
 static void __init hyp_cpu_pm_init(void)
 {
-       cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+       if (!is_protected_kvm_enabled())
+               cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
 }
 static void __init hyp_cpu_pm_exit(void)
 {
-       cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
+       if (!is_protected_kvm_enabled())
+               cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
 }
 #else
 static inline void hyp_cpu_pm_init(void)
@@ -1497,6 +1593,36 @@ static inline void hyp_cpu_pm_exit(void)
 }
 #endif
 
+static void init_cpu_logical_map(void)
+{
+       unsigned int cpu;
+
+       /*
+        * Copy the MPIDR <-> logical CPU ID mapping to hyp.
+        * Only copy the set of online CPUs whose features have been chacked
+        * against the finalized system capabilities. The hypervisor will not
+        * allow any other CPUs from the `possible` set to boot.
+        */
+       for_each_online_cpu(cpu)
+               kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu);
+}
+
+static bool init_psci_relay(void)
+{
+       /*
+        * If PSCI has not been initialized, protected KVM cannot install
+        * itself on newly booted CPUs.
+        */
+       if (!psci_ops.get_version) {
+               kvm_err("Cannot initialize protected mode without PSCI\n");
+               return false;
+       }
+
+       kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version();
+       kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids();
+       return true;
+}
+
 static int init_common_resources(void)
 {
        return kvm_set_ipa_limit();
@@ -1541,10 +1667,11 @@ static int init_subsystems(void)
                goto out;
 
        kvm_perf_init();
-       kvm_coproc_table_init();
+       kvm_sys_reg_table_init();
 
 out:
-       on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+       if (err || !is_protected_kvm_enabled())
+               on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
 
        return err;
 }
@@ -1618,6 +1745,14 @@ static int init_hyp_mode(void)
                goto out_err;
        }
 
+       err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
+                                 kvm_ksym_ref(__hyp_data_ro_after_init_end),
+                                 PAGE_HYP_RO);
+       if (err) {
+               kvm_err("Cannot map .hyp.data..ro_after_init section\n");
+               goto out_err;
+       }
+
        err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
                                  kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
        if (err) {
@@ -1632,12 +1767,6 @@ static int init_hyp_mode(void)
                goto out_err;
        }
 
-       err = kvm_map_vectors();
-       if (err) {
-               kvm_err("Cannot map vectors\n");
-               goto out_err;
-       }
-
        /*
         * Map the Hyp stack pages
         */
@@ -1667,6 +1796,13 @@ static int init_hyp_mode(void)
                }
        }
 
+       if (is_protected_kvm_enabled()) {
+               init_cpu_logical_map();
+
+               if (!init_psci_relay())
+                       goto out_err;
+       }
+
        return 0;
 
 out_err:
@@ -1781,14 +1917,24 @@ int kvm_arch_init(void *opaque)
                        goto out_err;
        }
 
+       err = kvm_init_vector_slots();
+       if (err) {
+               kvm_err("Cannot initialise vector slots\n");
+               goto out_err;
+       }
+
        err = init_subsystems();
        if (err)
                goto out_hyp;
 
-       if (in_hyp_mode)
+       if (is_protected_kvm_enabled()) {
+               static_branch_enable(&kvm_protected_mode_initialized);
+               kvm_info("Protected nVHE mode initialized successfully\n");
+       } else if (in_hyp_mode) {
                kvm_info("VHE mode initialized successfully\n");
-       else
+       } else {
                kvm_info("Hyp mode initialized successfully\n");
+       }
 
        return 0;
 
@@ -1806,6 +1952,25 @@ void kvm_arch_exit(void)
        kvm_perf_teardown();
 }
 
+static int __init early_kvm_mode_cfg(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       if (strcmp(arg, "protected") == 0) {
+               kvm_mode = KVM_MODE_PROTECTED;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+early_param("kvm-arm.mode", early_kvm_mode_cfg);
+
+enum kvm_mode kvm_get_mode(void)
+{
+       return kvm_mode;
+}
+
 static int arm_init(void)
 {
        int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
index dfb5218..9bbd30e 100644 (file)
@@ -24,7 +24,6 @@
 #include <asm/fpsimd.h>
 #include <asm/kvm.h>
 #include <asm/kvm_emulate.h>
-#include <asm/kvm_coproc.h>
 #include <asm/sigcontext.h>
 
 #include "trace.h"
@@ -252,10 +251,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        memcpy(addr, valp, KVM_REG_SIZE(reg->id));
 
        if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
-               int i;
+               int i, nr_reg;
+
+               switch (*vcpu_cpsr(vcpu)) {
+               /*
+                * Either we are dealing with user mode, and only the
+                * first 15 registers (+ PC) must be narrowed to 32bit.
+                * AArch32 r0-r14 conveniently map to AArch64 x0-x14.
+                */
+               case PSR_AA32_MODE_USR:
+               case PSR_AA32_MODE_SYS:
+                       nr_reg = 15;
+                       break;
+
+               /*
+                * Otherwide, this is a priviledged mode, and *all* the
+                * registers must be narrowed to 32bit.
+                */
+               default:
+                       nr_reg = 31;
+                       break;
+               }
+
+               for (i = 0; i < nr_reg; i++)
+                       vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i));
 
-               for (i = 0; i < 16; i++)
-                       *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+               *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu);
        }
 out:
        return err;
index 5d690d6..cebe39f 100644 (file)
@@ -14,7 +14,6 @@
 #include <asm/esr.h>
 #include <asm/exception.h>
 #include <asm/kvm_asm.h>
-#include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 #include <asm/debug-monitors.h>
@@ -61,7 +60,7 @@ static int handle_smc(struct kvm_vcpu *vcpu)
         * otherwise return to the same address...
         */
        vcpu_set_reg(vcpu, 0, ~0UL);
-       kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+       kvm_incr_pc(vcpu);
        return 1;
 }
 
@@ -100,7 +99,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
                kvm_clear_request(KVM_REQ_UNHALT, vcpu);
        }
 
-       kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+       kvm_incr_pc(vcpu);
 
        return 1;
 }
@@ -221,7 +220,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu)
         * that fail their condition code check"
         */
        if (!kvm_condition_valid(vcpu)) {
-               kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+               kvm_incr_pc(vcpu);
                handled = 1;
        } else {
                exit_handle_fn exit_handler;
@@ -241,23 +240,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index)
 {
        struct kvm_run *run = vcpu->run;
 
-       if (ARM_SERROR_PENDING(exception_index)) {
-               u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
-
-               /*
-                * HVC/SMC already have an adjusted PC, which we need
-                * to correct in order to return to after having
-                * injected the SError.
-                */
-               if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 ||
-                   esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) {
-                       u32 adj =  kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2;
-                       *vcpu_pc(vcpu) -= adj;
-               }
-
-               return 1;
-       }
-
        exception_index = ARM_EXCEPTION_CODE(exception_index);
 
        switch (exception_index) {
index 4a81edd..687598e 100644 (file)
@@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir)                               \
                    -DDISABLE_BRANCH_PROFILING          \
                    $(DISABLE_STACKLEAK_PLUGIN)
 
-obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
index ae56d8a..f98cbe2 100644 (file)
@@ -123,13 +123,13 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
  * kvm_skip_instr - skip a trapped instruction and proceed to the next
  * @vcpu: The vcpu pointer
  */
-void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+void kvm_skip_instr32(struct kvm_vcpu *vcpu)
 {
        u32 pc = *vcpu_pc(vcpu);
        bool is_thumb;
 
        is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT);
-       if (is_thumb && !is_wide_instr)
+       if (is_thumb && !kvm_vcpu_trap_il_is32bit(vcpu))
                pc += 2;
        else
                pc += 4;
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
new file mode 100644 (file)
index 0000000..7362909
--- /dev/null
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ */
+
+#include <hyp/adjust_pc.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__)
+#error Hypervisor code only!
+#endif
+
+static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
+{
+       u64 val;
+
+       if (__vcpu_read_sys_reg_from_cpu(reg, &val))
+               return val;
+
+       return __vcpu_sys_reg(vcpu, reg);
+}
+
+static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+       if (__vcpu_write_sys_reg_to_cpu(val, reg))
+               return;
+
+        __vcpu_sys_reg(vcpu, reg) = val;
+}
+
+static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val)
+{
+       write_sysreg_el1(val, SYS_SPSR);
+}
+
+static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val)
+{
+       if (has_vhe())
+               write_sysreg(val, spsr_abt);
+       else
+               vcpu->arch.ctxt.spsr_abt = val;
+}
+
+static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val)
+{
+       if (has_vhe())
+               write_sysreg(val, spsr_und);
+       else
+               vcpu->arch.ctxt.spsr_und = val;
+}
+
+/*
+ * This performs the exception entry at a given EL (@target_mode), stashing PC
+ * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
+ * The EL passed to this function *must* be a non-secure, privileged mode with
+ * bit 0 being set (PSTATE.SP == 1).
+ *
+ * When an exception is taken, most PSTATE fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
+ * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
+ * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
+ *
+ * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
+ * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
+ *
+ * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
+                             enum exception_type type)
+{
+       unsigned long sctlr, vbar, old, new, mode;
+       u64 exc_offset;
+
+       mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+       if      (mode == target_mode)
+               exc_offset = CURRENT_EL_SP_ELx_VECTOR;
+       else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
+               exc_offset = CURRENT_EL_SP_EL0_VECTOR;
+       else if (!(mode & PSR_MODE32_BIT))
+               exc_offset = LOWER_EL_AArch64_VECTOR;
+       else
+               exc_offset = LOWER_EL_AArch32_VECTOR;
+
+       switch (target_mode) {
+       case PSR_MODE_EL1h:
+               vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1);
+               sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+               __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
+               break;
+       default:
+               /* Don't do that */
+               BUG();
+       }
+
+       *vcpu_pc(vcpu) = vbar + exc_offset + type;
+
+       old = *vcpu_cpsr(vcpu);
+       new = 0;
+
+       new |= (old & PSR_N_BIT);
+       new |= (old & PSR_Z_BIT);
+       new |= (old & PSR_C_BIT);
+       new |= (old & PSR_V_BIT);
+
+       // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+
+       new |= (old & PSR_DIT_BIT);
+
+       // PSTATE.UAO is set to zero upon any exception to AArch64
+       // See ARM DDI 0487E.a, page D5-2579.
+
+       // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
+       // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
+       // See ARM DDI 0487E.a, page D5-2578.
+       new |= (old & PSR_PAN_BIT);
+       if (!(sctlr & SCTLR_EL1_SPAN))
+               new |= PSR_PAN_BIT;
+
+       // PSTATE.SS is set to zero upon any exception to AArch64
+       // See ARM DDI 0487E.a, page D2-2452.
+
+       // PSTATE.IL is set to zero upon any exception to AArch64
+       // See ARM DDI 0487E.a, page D1-2306.
+
+       // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
+       // See ARM DDI 0487E.a, page D13-3258
+       if (sctlr & SCTLR_ELx_DSSBS)
+               new |= PSR_SSBS_BIT;
+
+       // PSTATE.BTYPE is set to zero upon any exception to AArch64
+       // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
+
+       new |= PSR_D_BIT;
+       new |= PSR_A_BIT;
+       new |= PSR_I_BIT;
+       new |= PSR_F_BIT;
+
+       new |= target_mode;
+
+       *vcpu_cpsr(vcpu) = new;
+       __vcpu_write_spsr(vcpu, old);
+}
+
+/*
+ * When an exception is taken, most CPSR fields are left unchanged in the
+ * handler. However, some are explicitly overridden (e.g. M[4:0]).
+ *
+ * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with
+ * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was
+ * obsoleted by the ARMv7 virtualization extensions and is RES0.
+ *
+ * For the SPSR layout seen from AArch32, see:
+ * - ARM DDI 0406C.d, page B1-1148
+ * - ARM DDI 0487E.a, page G8-6264
+ *
+ * For the SPSR_ELx layout for AArch32 seen from AArch64, see:
+ * - ARM DDI 0487E.a, page C5-426
+ *
+ * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from
+ * MSB to LSB.
+ */
+static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode)
+{
+       u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+       unsigned long old, new;
+
+       old = *vcpu_cpsr(vcpu);
+       new = 0;
+
+       new |= (old & PSR_AA32_N_BIT);
+       new |= (old & PSR_AA32_Z_BIT);
+       new |= (old & PSR_AA32_C_BIT);
+       new |= (old & PSR_AA32_V_BIT);
+       new |= (old & PSR_AA32_Q_BIT);
+
+       // CPSR.IT[7:0] are set to zero upon any exception
+       // See ARM DDI 0487E.a, section G1.12.3
+       // See ARM DDI 0406C.d, section B1.8.3
+
+       new |= (old & PSR_AA32_DIT_BIT);
+
+       // CPSR.SSBS is set to SCTLR.DSSBS upon any exception
+       // See ARM DDI 0487E.a, page G8-6244
+       if (sctlr & BIT(31))
+               new |= PSR_AA32_SSBS_BIT;
+
+       // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0
+       // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented
+       // See ARM DDI 0487E.a, page G8-6246
+       new |= (old & PSR_AA32_PAN_BIT);
+       if (!(sctlr & BIT(23)))
+               new |= PSR_AA32_PAN_BIT;
+
+       // SS does not exist in AArch32, so ignore
+
+       // CPSR.IL is set to zero upon any exception
+       // See ARM DDI 0487E.a, page G1-5527
+
+       new |= (old & PSR_AA32_GE_MASK);
+
+       // CPSR.IT[7:0] are set to zero upon any exception
+       // See prior comment above
+
+       // CPSR.E is set to SCTLR.EE upon any exception
+       // See ARM DDI 0487E.a, page G8-6245
+       // See ARM DDI 0406C.d, page B4-1701
+       if (sctlr & BIT(25))
+               new |= PSR_AA32_E_BIT;
+
+       // CPSR.A is unchanged upon an exception to Undefined, Supervisor
+       // CPSR.A is set upon an exception to other modes
+       // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+       // See ARM DDI 0406C.d, page B1-1182
+       new |= (old & PSR_AA32_A_BIT);
+       if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC)
+               new |= PSR_AA32_A_BIT;
+
+       // CPSR.I is set upon any exception
+       // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+       // See ARM DDI 0406C.d, page B1-1182
+       new |= PSR_AA32_I_BIT;
+
+       // CPSR.F is set upon an exception to FIQ
+       // CPSR.F is unchanged upon an exception to other modes
+       // See ARM DDI 0487E.a, pages G1-5515 to G1-5516
+       // See ARM DDI 0406C.d, page B1-1182
+       new |= (old & PSR_AA32_F_BIT);
+       if (mode == PSR_AA32_MODE_FIQ)
+               new |= PSR_AA32_F_BIT;
+
+       // CPSR.T is set to SCTLR.TE upon any exception
+       // See ARM DDI 0487E.a, page G8-5514
+       // See ARM DDI 0406C.d, page B1-1181
+       if (sctlr & BIT(30))
+               new |= PSR_AA32_T_BIT;
+
+       new |= mode;
+
+       return new;
+}
+
+/*
+ * Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
+ */
+static const u8 return_offsets[8][2] = {
+       [0] = { 0, 0 },         /* Reset, unused */
+       [1] = { 4, 2 },         /* Undefined */
+       [2] = { 0, 0 },         /* SVC, unused */
+       [3] = { 4, 4 },         /* Prefetch abort */
+       [4] = { 8, 8 },         /* Data abort */
+       [5] = { 0, 0 },         /* HVC, unused */
+       [6] = { 4, 4 },         /* IRQ, unused */
+       [7] = { 4, 4 },         /* FIQ, unused */
+};
+
+static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+       unsigned long spsr = *vcpu_cpsr(vcpu);
+       bool is_thumb = (spsr & PSR_AA32_T_BIT);
+       u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+       u32 return_address;
+
+       *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode);
+       return_address   = *vcpu_pc(vcpu);
+       return_address  += return_offsets[vect_offset >> 2][is_thumb];
+
+       /* KVM only enters the ABT and UND modes, so only deal with those */
+       switch(mode) {
+       case PSR_AA32_MODE_ABT:
+               __vcpu_write_spsr_abt(vcpu, host_spsr_to_spsr32(spsr));
+               vcpu_gp_regs(vcpu)->compat_lr_abt = return_address;
+               break;
+
+       case PSR_AA32_MODE_UND:
+               __vcpu_write_spsr_und(vcpu, host_spsr_to_spsr32(spsr));
+               vcpu_gp_regs(vcpu)->compat_lr_und = return_address;
+               break;
+       }
+
+       /* Branch to exception vector */
+       if (sctlr & (1 << 13))
+               vect_offset += 0xffff0000;
+       else /* always have security exceptions */
+               vect_offset += __vcpu_read_sys_reg(vcpu, VBAR_EL1);
+
+       *vcpu_pc(vcpu) = vect_offset;
+}
+
+void kvm_inject_exception(struct kvm_vcpu *vcpu)
+{
+       if (vcpu_el1_is_32bit(vcpu)) {
+               switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
+               case KVM_ARM64_EXCEPT_AA32_UND:
+                       enter_exception32(vcpu, PSR_AA32_MODE_UND, 4);
+                       break;
+               case KVM_ARM64_EXCEPT_AA32_IABT:
+                       enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12);
+                       break;
+               case KVM_ARM64_EXCEPT_AA32_DABT:
+                       enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16);
+                       break;
+               default:
+                       /* Err... */
+                       break;
+               }
+       } else {
+               switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
+               case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+                     KVM_ARM64_EXCEPT_AA64_EL1):
+                       enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
+                       break;
+               default:
+                       /*
+                        * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ}
+                        * will be implemented at some point. Everything
+                        * else gets silently ignored.
+                        */
+                       break;
+               }
+       }
+}
index 0a5b36e..d179056 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/mmu.h>
+#include <asm/spectre.h>
 
 .macro save_caller_saved_regs_vect
        /* x0 and x1 were saved in the vector entry */
@@ -187,52 +188,60 @@ SYM_CODE_START(__kvm_hyp_vector)
        valid_vect      el1_error               // Error 32-bit EL1
 SYM_CODE_END(__kvm_hyp_vector)
 
-.macro hyp_ventry
-       .align 7
+.macro spectrev2_smccc_wa1_smc
+       sub     sp, sp, #(8 * 4)
+       stp     x2, x3, [sp, #(8 * 0)]
+       stp     x0, x1, [sp, #(8 * 2)]
+       mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+       smc     #0
+       ldp     x2, x3, [sp, #(8 * 0)]
+       add     sp, sp, #(8 * 2)
+.endm
+
+.macro hyp_ventry      indirect, spectrev2
+       .align  7
 1:     esb
-       .rept 26
-       nop
-       .endr
-/*
- * The default sequence is to directly branch to the KVM vectors,
- * using the computed offset. This applies for VHE as well as
- * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble.
- *
- * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
- * with:
- *
- * stp x0, x1, [sp, #-16]!
- * movz        x0, #(addr & 0xffff)
- * movk        x0, #((addr >> 16) & 0xffff), lsl #16
- * movk        x0, #((addr >> 32) & 0xffff), lsl #32
- * br  x0
- *
- * Where:
- * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
- * See kvm_patch_vector_branch for details.
- */
-alternative_cb kvm_patch_vector_branch
+       .if \spectrev2 != 0
+       spectrev2_smccc_wa1_smc
+       .else
        stp     x0, x1, [sp, #-16]!
-       b       __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
+       .endif
+       .if \indirect != 0
+       alternative_cb  kvm_patch_vector_branch
+       /*
+        * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with:
+        *
+        * movz x0, #(addr & 0xffff)
+        * movk x0, #((addr >> 16) & 0xffff), lsl #16
+        * movk x0, #((addr >> 32) & 0xffff), lsl #32
+        * br   x0
+        *
+        * Where:
+        * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
+        * See kvm_patch_vector_branch for details.
+        */
        nop
        nop
        nop
-alternative_cb_end
+       nop
+       alternative_cb_end
+       .endif
+       b       __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
 .endm
 
-.macro generate_vectors
+.macro generate_vectors        indirect, spectrev2
 0:
        .rept 16
-       hyp_ventry
+       hyp_ventry      \indirect, \spectrev2
        .endr
        .org 0b + SZ_2K         // Safety measure
 .endm
 
        .align  11
 SYM_CODE_START(__bp_harden_hyp_vecs)
-       .rept BP_HARDEN_EL2_SLOTS
-       generate_vectors
-       .endr
+       generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT
+       generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT
+       generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT
 1:     .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ
        .org 1b
 SYM_CODE_END(__bp_harden_hyp_vecs)
diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
new file mode 100644 (file)
index 0000000..b1f6092
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Guest PC manipulation helpers
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Copyright (C) 2020 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__
+#define __ARM64_KVM_HYP_ADJUST_PC_H__
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_host.h>
+
+void kvm_inject_exception(struct kvm_vcpu *vcpu);
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu)
+{
+       if (vcpu_mode_is_32bit(vcpu)) {
+               kvm_skip_instr32(vcpu);
+       } else {
+               *vcpu_pc(vcpu) += 4;
+               *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK;
+       }
+
+       /* advance the singlestep state machine */
+       *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+}
+
+/*
+ * Skip an instruction which has been emulated at hyp while most guest sysregs
+ * are live.
+ */
+static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
+{
+       *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+       vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR);
+
+       kvm_skip_instr(vcpu);
+
+       write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR);
+       write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+}
+
+/*
+ * Adjust the guest PC on entry, depending on flags provided by EL1
+ * for the purpose of emulation (MMIO, sysreg) or exception injection.
+ */
+static inline void __adjust_pc(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
+               kvm_inject_exception(vcpu);
+               vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
+                                     KVM_ARM64_EXCEPT_MASK);
+       } else  if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
+               kvm_skip_instr(vcpu);
+               vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
+       }
+}
+
+#endif
index 1f875a8..8447357 100644 (file)
@@ -7,6 +7,8 @@
 #ifndef __ARM64_KVM_HYP_SWITCH_H__
 #define __ARM64_KVM_HYP_SWITCH_H__
 
+#include <hyp/adjust_pc.h>
+
 #include <linux/arm-smccc.h>
 #include <linux/kvm_host.h>
 #include <linux/types.h>
@@ -409,6 +411,21 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
        if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
                vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
 
+       if (ARM_SERROR_PENDING(*exit_code)) {
+               u8 esr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+               /*
+                * HVC already have an adjusted PC, which we need to
+                * correct in order to return to after having injected
+                * the SError.
+                *
+                * SMC, on the other hand, is *trapped*, meaning its
+                * preferred return address is the SMC itself.
+                */
+               if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64)
+                       write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR);
+       }
+
        /*
         * We're using the raw exception code in order to only process
         * the trap if no SError is pending. We will come back to the
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
new file mode 100644 (file)
index 0000000..1e6d995
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trap handler helpers.
+ *
+ * Copyright (C) 2020 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__
+#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__
+
+#include <asm/kvm_host.h>
+
+#define cpu_reg(ctxt, r)       (ctxt)->regs.regs[r]
+#define DECLARE_REG(type, name, ctxt, reg)     \
+                               type name = (type)cpu_reg(ctxt, (reg))
+
+#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
index ddde15f..1f1e351 100644 (file)
@@ -6,9 +6,10 @@
 asflags-y := -D__KVM_NVHE_HYPERVISOR__
 ccflags-y := -D__KVM_NVHE_HYPERVISOR__
 
-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o
+obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
+        hyp-main.o hyp-smp.o psci-relay.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
-        ../fpsimd.o ../hyp-entry.o
+        ../fpsimd.o ../hyp-entry.o ../exception.o
 
 ##
 ## Build rules for compiling nVHE hyp code
index ed27f06..a820dfd 100644 (file)
@@ -13,8 +13,6 @@
        .text
 
 SYM_FUNC_START(__host_exit)
-       stp     x0, x1, [sp, #-16]!
-
        get_host_ctxt   x0, x1
 
        /* Store the host regs x2 and x3 */
@@ -41,6 +39,7 @@ SYM_FUNC_START(__host_exit)
        bl      handle_trap
 
        /* Restore host regs x0-x17 */
+__host_enter_restore_full:
        ldp     x0, x1,   [x29, #CPU_XREG_OFFSET(0)]
        ldp     x2, x3,   [x29, #CPU_XREG_OFFSET(2)]
        ldp     x4, x5,   [x29, #CPU_XREG_OFFSET(4)]
@@ -64,6 +63,14 @@ __host_enter_without_restoring:
 SYM_FUNC_END(__host_exit)
 
 /*
+ * void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+ */
+SYM_FUNC_START(__host_enter)
+       mov     x29, x0
+       b       __host_enter_restore_full
+SYM_FUNC_END(__host_enter)
+
+/*
  * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
  */
 SYM_FUNC_START(__hyp_do_panic)
@@ -99,13 +106,15 @@ SYM_FUNC_END(__hyp_do_panic)
        mrs     x0, esr_el2
        lsr     x0, x0, #ESR_ELx_EC_SHIFT
        cmp     x0, #ESR_ELx_EC_HVC64
-       ldp     x0, x1, [sp], #16
        b.ne    __host_exit
 
+       ldp     x0, x1, [sp]            // Don't fixup the stack yet
+
        /* Check for a stub HVC call */
        cmp     x0, #HVC_STUB_HCALL_NR
        b.hs    __host_exit
 
+       add     sp, sp, #16
        /*
         * Compute the idmap address of __kvm_handle_stub_hvc and
         * jump there. Since we use kimage_voffset, do not use the
@@ -115,10 +124,7 @@ SYM_FUNC_END(__hyp_do_panic)
         * Preserve x0-x4, which may contain stub parameters.
         */
        ldr     x5, =__kvm_handle_stub_hvc
-       ldr_l   x6, kimage_voffset
-
-       /* x5 = __pa(x5) */
-       sub     x5, x5, x6
+       kimg_pa x5, x6
        br      x5
 .L__vect_end\@:
 .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
@@ -183,3 +189,41 @@ SYM_CODE_START(__kvm_hyp_host_vector)
        invalid_host_el1_vect                   // FIQ 32-bit EL1
        invalid_host_el1_vect                   // Error 32-bit EL1
 SYM_CODE_END(__kvm_hyp_host_vector)
+
+/*
+ * Forward SMC with arguments in struct kvm_cpu_context, and
+ * store the result into the same struct. Assumes SMCCC 1.2 or older.
+ *
+ * x0: struct kvm_cpu_context*
+ */
+SYM_CODE_START(__kvm_hyp_host_forward_smc)
+       /*
+        * Use x18 to keep the pointer to the host context because
+        * x18 is callee-saved in SMCCC but not in AAPCS64.
+        */
+       mov     x18, x0
+
+       ldp     x0, x1,   [x18, #CPU_XREG_OFFSET(0)]
+       ldp     x2, x3,   [x18, #CPU_XREG_OFFSET(2)]
+       ldp     x4, x5,   [x18, #CPU_XREG_OFFSET(4)]
+       ldp     x6, x7,   [x18, #CPU_XREG_OFFSET(6)]
+       ldp     x8, x9,   [x18, #CPU_XREG_OFFSET(8)]
+       ldp     x10, x11, [x18, #CPU_XREG_OFFSET(10)]
+       ldp     x12, x13, [x18, #CPU_XREG_OFFSET(12)]
+       ldp     x14, x15, [x18, #CPU_XREG_OFFSET(14)]
+       ldp     x16, x17, [x18, #CPU_XREG_OFFSET(16)]
+
+       smc     #0
+
+       stp     x0, x1,   [x18, #CPU_XREG_OFFSET(0)]
+       stp     x2, x3,   [x18, #CPU_XREG_OFFSET(2)]
+       stp     x4, x5,   [x18, #CPU_XREG_OFFSET(4)]
+       stp     x6, x7,   [x18, #CPU_XREG_OFFSET(6)]
+       stp     x8, x9,   [x18, #CPU_XREG_OFFSET(8)]
+       stp     x10, x11, [x18, #CPU_XREG_OFFSET(10)]
+       stp     x12, x13, [x18, #CPU_XREG_OFFSET(12)]
+       stp     x14, x15, [x18, #CPU_XREG_OFFSET(14)]
+       stp     x16, x17, [x18, #CPU_XREG_OFFSET(16)]
+
+       ret
+SYM_CODE_END(__kvm_hyp_host_forward_smc)
index b11a9d7..31b060a 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/el2_setup.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
@@ -47,10 +48,7 @@ __invalid:
 
        /*
         * x0: SMCCC function ID
-        * x1: HYP pgd
-        * x2: per-CPU offset
-        * x3: HYP stack
-        * x4: HYP vectors
+        * x1: struct kvm_nvhe_init_params PA
         */
 __do_hyp_init:
        /* Check for a stub HVC call */
@@ -71,48 +69,53 @@ __do_hyp_init:
        mov     x0, #SMCCC_RET_NOT_SUPPORTED
        eret
 
-1:
-       /* Set tpidr_el2 for use by HYP to free a register */
-       msr     tpidr_el2, x2
+1:     mov     x0, x1
+       mov     x4, lr
+       bl      ___kvm_hyp_init
+       mov     lr, x4
 
-       phys_to_ttbr x0, x1
-alternative_if ARM64_HAS_CNP
-       orr     x0, x0, #TTBR_CNP_BIT
+       /* Hello, World! */
+       mov     x0, #SMCCC_RET_SUCCESS
+       eret
+SYM_CODE_END(__kvm_hyp_init)
+
+/*
+ * Initialize the hypervisor in EL2.
+ *
+ * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers
+ * and leave x4 for the caller.
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ */
+SYM_CODE_START_LOCAL(___kvm_hyp_init)
+alternative_if ARM64_KVM_PROTECTED_MODE
+       mov_q   x1, HCR_HOST_NVHE_PROTECTED_FLAGS
+       msr     hcr_el2, x1
 alternative_else_nop_endif
-       msr     ttbr0_el2, x0
 
-       mrs     x0, tcr_el1
-       mov_q   x1, TCR_EL2_MASK
-       and     x0, x0, x1
-       mov     x1, #TCR_EL2_RES1
-       orr     x0, x0, x1
+       ldr     x1, [x0, #NVHE_INIT_TPIDR_EL2]
+       msr     tpidr_el2, x1
 
-       /*
-        * The ID map may be configured to use an extended virtual address
-        * range. This is only the case if system RAM is out of range for the
-        * currently configured page size and VA_BITS, in which case we will
-        * also need the extended virtual range for the HYP ID map, or we won't
-        * be able to enable the EL2 MMU.
-        *
-        * However, at EL2, there is only one TTBR register, and we can't switch
-        * between translation tables *and* update TCR_EL2.T0SZ at the same
-        * time. Bottom line: we need to use the extended range with *both* our
-        * translation tables.
-        *
-        * So use the same T0SZ value we use for the ID map.
-        */
-       ldr_l   x1, idmap_t0sz
-       bfi     x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+       ldr     x1, [x0, #NVHE_INIT_STACK_HYP_VA]
+       mov     sp, x1
+
+       ldr     x1, [x0, #NVHE_INIT_MAIR_EL2]
+       msr     mair_el2, x1
+
+       ldr     x1, [x0, #NVHE_INIT_PGD_PA]
+       phys_to_ttbr x2, x1
+alternative_if ARM64_HAS_CNP
+       orr     x2, x2, #TTBR_CNP_BIT
+alternative_else_nop_endif
+       msr     ttbr0_el2, x2
 
        /*
         * Set the PS bits in TCR_EL2.
         */
-       tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
+       ldr     x1, [x0, #NVHE_INIT_TCR_EL2]
+       tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3
+       msr     tcr_el2, x1
 
-       msr     tcr_el2, x0
-
-       mrs     x0, mair_el1
-       msr     mair_el2, x0
        isb
 
        /* Invalidate the stale TLBs from Bootloader */
@@ -134,14 +137,70 @@ alternative_else_nop_endif
        msr     sctlr_el2, x0
        isb
 
-       /* Set the stack and new vectors */
-       mov     sp, x3
-       msr     vbar_el2, x4
+       /* Set the host vector */
+       ldr     x0, =__kvm_hyp_host_vector
+       kimg_hyp_va x0, x1
+       msr     vbar_el2, x0
 
-       /* Hello, World! */
-       mov     x0, #SMCCC_RET_SUCCESS
-       eret
-SYM_CODE_END(__kvm_hyp_init)
+       ret
+SYM_CODE_END(___kvm_hyp_init)
+
+/*
+ * PSCI CPU_ON entry point
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ */
+SYM_CODE_START(kvm_hyp_cpu_entry)
+       mov     x1, #1                          // is_cpu_on = true
+       b       __kvm_hyp_init_cpu
+SYM_CODE_END(kvm_hyp_cpu_entry)
+
+/*
+ * PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ */
+SYM_CODE_START(kvm_hyp_cpu_resume)
+       mov     x1, #0                          // is_cpu_on = false
+       b       __kvm_hyp_init_cpu
+SYM_CODE_END(kvm_hyp_cpu_resume)
+
+/*
+ * Common code for CPU entry points. Initializes EL2 state and
+ * installs the hypervisor before handing over to a C handler.
+ *
+ * x0: struct kvm_nvhe_init_params PA
+ * x1: bool is_cpu_on
+ */
+SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
+       mov     x28, x0                         // Stash arguments
+       mov     x29, x1
+
+       /* Check that the core was booted in EL2. */
+       mrs     x0, CurrentEL
+       cmp     x0, #CurrentEL_EL2
+       b.eq    2f
+
+       /* The core booted in EL1. KVM cannot be initialized on it. */
+1:     wfe
+       wfi
+       b       1b
+
+2:     msr     SPsel, #1                       // We want to use SP_EL{1,2}
+
+       /* Initialize EL2 CPU state to sane values. */
+       init_el2_state nvhe                     // Clobbers x0..x2
+
+       /* Enable MMU, set vectors and stack. */
+       mov     x0, x28
+       bl      ___kvm_hyp_init                 // Clobbers x0..x3
+
+       /* Leave idmap. */
+       mov     x0, x29
+       ldr     x1, =kvm_host_psci_cpu_entry
+       kimg_hyp_va x1, x2
+       br      x1
+SYM_CODE_END(__kvm_hyp_init_cpu)
 
 SYM_CODE_START(__kvm_handle_stub_hvc)
        cmp     x0, #HVC_SOFT_RESTART
@@ -176,6 +235,11 @@ reset:
        msr     sctlr_el2, x5
        isb
 
+alternative_if ARM64_KVM_PROTECTED_MODE
+       mov_q   x5, HCR_HOST_NVHE_FLAGS
+       msr     hcr_el2, x5
+alternative_else_nop_endif
+
        /* Install stub vectors */
        adr_l   x5, __hyp_stub_vectors
        msr     vbar_el2, x5
index e2eafe2..bde658d 100644 (file)
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 
-#include <kvm/arm_hypercalls.h>
+#include <nvhe/trap_handler.h>
 
-static void handle_host_hcall(unsigned long func_id,
-                             struct kvm_cpu_context *host_ctxt)
+DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
+
+void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+
+static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
 {
-       unsigned long ret = 0;
+       DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
 
-       switch (func_id) {
-       case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): {
-               unsigned long r1 = host_ctxt->regs.regs[1];
-               struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1;
+       cpu_reg(host_ctxt, 1) =  __kvm_vcpu_run(kern_hyp_va(vcpu));
+}
 
-               ret = __kvm_vcpu_run(kern_hyp_va(vcpu));
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context):
-               __kvm_flush_vm_context();
-               break;
-       case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): {
-               unsigned long r1 = host_ctxt->regs.regs[1];
-               struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
-               phys_addr_t ipa = host_ctxt->regs.regs[2];
-               int level = host_ctxt->regs.regs[3];
+static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
+{
+       __kvm_flush_vm_context();
+}
 
-               __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): {
-               unsigned long r1 = host_ctxt->regs.regs[1];
-               struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
+static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+       DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
+       DECLARE_REG(int, level, host_ctxt, 3);
 
-               __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): {
-               unsigned long r1 = host_ctxt->regs.regs[1];
-               struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1;
+       __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
+}
 
-               __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): {
-               u64 cntvoff = host_ctxt->regs.regs[1];
+static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
 
-               __kvm_timer_set_cntvoff(cntvoff);
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs):
-               __kvm_enable_ssbs();
-               break;
-       case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2):
-               ret = __vgic_v3_get_ich_vtr_el2();
-               break;
-       case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr):
-               ret = __vgic_v3_read_vmcr();
-               break;
-       case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): {
-               u32 vmcr = host_ctxt->regs.regs[1];
+       __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
+}
 
-               __vgic_v3_write_vmcr(vmcr);
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs):
-               __vgic_v3_init_lrs();
-               break;
-       case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2):
-               ret = __kvm_get_mdcr_el2();
-               break;
-       case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): {
-               unsigned long r1 = host_ctxt->regs.regs[1];
-               struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1;
+static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
 
-               __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
-               break;
-       }
-       case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): {
-               unsigned long r1 = host_ctxt->regs.regs[1];
-               struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1;
+       __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
+}
 
-               __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
-               break;
-       }
-       default:
-               /* Invalid host HVC. */
-               host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED;
-               return;
-       }
+static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
+{
+       __kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1));
+}
+
+static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
+{
+       u64 tmp;
 
-       host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS;
-       host_ctxt->regs.regs[1] = ret;
+       tmp = read_sysreg_el2(SYS_SCTLR);
+       tmp |= SCTLR_ELx_DSSBS;
+       write_sysreg_el2(tmp, SYS_SCTLR);
+}
+
+static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
+{
+       cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2();
+}
+
+static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
+{
+       cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
+}
+
+static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
+{
+       __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
+}
+
+static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
+{
+       __vgic_v3_init_lrs();
+}
+
+static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
+{
+       cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
+}
+
+static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
+
+       __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+}
+
+static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
+
+       __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+}
+
+typedef void (*hcall_t)(struct kvm_cpu_context *);
+
+#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
+
+static const hcall_t *host_hcall[] = {
+       HANDLE_FUNC(__kvm_vcpu_run),
+       HANDLE_FUNC(__kvm_flush_vm_context),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid),
+       HANDLE_FUNC(__kvm_tlb_flush_local_vmid),
+       HANDLE_FUNC(__kvm_timer_set_cntvoff),
+       HANDLE_FUNC(__kvm_enable_ssbs),
+       HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
+       HANDLE_FUNC(__vgic_v3_read_vmcr),
+       HANDLE_FUNC(__vgic_v3_write_vmcr),
+       HANDLE_FUNC(__vgic_v3_init_lrs),
+       HANDLE_FUNC(__kvm_get_mdcr_el2),
+       HANDLE_FUNC(__vgic_v3_save_aprs),
+       HANDLE_FUNC(__vgic_v3_restore_aprs),
+};
+
+static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(unsigned long, id, host_ctxt, 0);
+       const hcall_t *kfn;
+       hcall_t hfn;
+
+       id -= KVM_HOST_SMCCC_ID(0);
+
+       if (unlikely(id >= ARRAY_SIZE(host_hcall)))
+               goto inval;
+
+       kfn = host_hcall[id];
+       if (unlikely(!kfn))
+               goto inval;
+
+       cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
+
+       hfn = kimg_fn_hyp_va(kfn);
+       hfn(host_ctxt);
+
+       return;
+inval:
+       cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
+}
+
+static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
+{
+       __kvm_hyp_host_forward_smc(host_ctxt);
+}
+
+static void skip_host_instruction(void)
+{
+       write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR);
+}
+
+static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
+{
+       bool handled;
+
+       handled = kvm_host_psci_handler(host_ctxt);
+       if (!handled)
+               default_host_smc_handler(host_ctxt);
+
+       /*
+        * Unlike HVC, the return address of an SMC is the instruction's PC.
+        * Move the return address past the instruction.
+        */
+       skip_host_instruction();
 }
 
 void handle_trap(struct kvm_cpu_context *host_ctxt)
 {
        u64 esr = read_sysreg_el2(SYS_ESR);
-       unsigned long func_id;
 
-       if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64)
+       switch (ESR_ELx_EC(esr)) {
+       case ESR_ELx_EC_HVC64:
+               handle_host_hcall(host_ctxt);
+               break;
+       case ESR_ELx_EC_SMC64:
+               handle_host_smc(host_ctxt);
+               break;
+       default:
                hyp_panic();
-
-       func_id = host_ctxt->regs.regs[0];
-       handle_host_hcall(func_id, host_ctxt);
+       }
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
new file mode 100644 (file)
index 0000000..cbab0c6
--- /dev/null
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ */
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * nVHE copy of data structures tracking available CPU cores.
+ * Only entries for CPUs that were online at KVM init are populated.
+ * Other CPUs should not be allowed to boot because their features were
+ * not checked against the finalized system capabilities.
+ */
+u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
+
+u64 cpu_logical_map(unsigned int cpu)
+{
+       if (cpu >= ARRAY_SIZE(__cpu_logical_map))
+               hyp_panic();
+
+       return __cpu_logical_map[cpu];
+}
+
+unsigned long __hyp_per_cpu_offset(unsigned int cpu)
+{
+       unsigned long *cpu_base_array;
+       unsigned long this_cpu_base;
+       unsigned long elf_base;
+
+       if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
+               hyp_panic();
+
+       cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
+       this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
+       elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
+       return this_cpu_base - elf_base;
+}
index a797aba..1206d0d 100644 (file)
@@ -21,4 +21,5 @@ SECTIONS {
        HYP_SECTION_NAME(.data..percpu) : {
                PERCPU_INPUT(L1_CACHE_BYTES)
        }
+       HYP_SECTION(.data..ro_after_init)
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
new file mode 100644 (file)
index 0000000..08dc9de
--- /dev/null
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ */
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <kvm/arm_hypercalls.h>
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/psci.h>
+#include <kvm/arm_psci.h>
+#include <uapi/linux/psci.h>
+
+#include <nvhe/trap_handler.h>
+
+void kvm_hyp_cpu_entry(unsigned long r0);
+void kvm_hyp_cpu_resume(unsigned long r0);
+
+void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
+
+/* Config options set by the host. */
+__ro_after_init u32 kvm_host_psci_version;
+__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids;
+__ro_after_init s64 hyp_physvirt_offset;
+
+#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
+
+#define INVALID_CPU_ID UINT_MAX
+
+struct psci_boot_args {
+       atomic_t lock;
+       unsigned long pc;
+       unsigned long r0;
+};
+
+#define PSCI_BOOT_ARGS_UNLOCKED                0
+#define PSCI_BOOT_ARGS_LOCKED          1
+
+#define PSCI_BOOT_ARGS_INIT                                    \
+       ((struct psci_boot_args){                               \
+               .lock = ATOMIC_INIT(PSCI_BOOT_ARGS_UNLOCKED),   \
+       })
+
+static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT;
+static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT;
+
+static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(u64, func_id, host_ctxt, 0);
+
+       return func_id;
+}
+
+static bool is_psci_0_1_call(u64 func_id)
+{
+       return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) ||
+              (func_id == kvm_host_psci_0_1_function_ids.cpu_on) ||
+              (func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
+              (func_id == kvm_host_psci_0_1_function_ids.migrate);
+}
+
+static bool is_psci_0_2_call(u64 func_id)
+{
+       /* SMCCC reserves IDs 0x00-1F with the given 32/64-bit base for PSCI. */
+       return (PSCI_0_2_FN(0) <= func_id && func_id <= PSCI_0_2_FN(31)) ||
+              (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31));
+}
+
+static bool is_psci_call(u64 func_id)
+{
+       switch (kvm_host_psci_version) {
+       case PSCI_VERSION(0, 1):
+               return is_psci_0_1_call(func_id);
+       default:
+               return is_psci_0_2_call(func_id);
+       }
+}
+
+static unsigned long psci_call(unsigned long fn, unsigned long arg0,
+                              unsigned long arg1, unsigned long arg2)
+{
+       struct arm_smccc_res res;
+
+       arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res);
+       return res.a0;
+}
+
+static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt)
+{
+       return psci_call(cpu_reg(host_ctxt, 0), cpu_reg(host_ctxt, 1),
+                        cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3));
+}
+
+static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt)
+{
+       psci_forward(host_ctxt);
+       hyp_panic(); /* unreachable */
+}
+
+static unsigned int find_cpu_id(u64 mpidr)
+{
+       unsigned int i;
+
+       /* Reject invalid MPIDRs */
+       if (mpidr & ~MPIDR_HWID_BITMASK)
+               return INVALID_CPU_ID;
+
+       for (i = 0; i < NR_CPUS; i++) {
+               if (cpu_logical_map(i) == mpidr)
+                       return i;
+       }
+
+       return INVALID_CPU_ID;
+}
+
+static __always_inline bool try_acquire_boot_args(struct psci_boot_args *args)
+{
+       return atomic_cmpxchg_acquire(&args->lock,
+                                     PSCI_BOOT_ARGS_UNLOCKED,
+                                     PSCI_BOOT_ARGS_LOCKED) ==
+               PSCI_BOOT_ARGS_UNLOCKED;
+}
+
+static __always_inline void release_boot_args(struct psci_boot_args *args)
+{
+       atomic_set_release(&args->lock, PSCI_BOOT_ARGS_UNLOCKED);
+}
+
+static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(u64, mpidr, host_ctxt, 1);
+       DECLARE_REG(unsigned long, pc, host_ctxt, 2);
+       DECLARE_REG(unsigned long, r0, host_ctxt, 3);
+
+       unsigned int cpu_id;
+       struct psci_boot_args *boot_args;
+       struct kvm_nvhe_init_params *init_params;
+       int ret;
+
+       /*
+        * Find the logical CPU ID for the given MPIDR. The search set is
+        * the set of CPUs that were online at the point of KVM initialization.
+        * Booting other CPUs is rejected because their cpufeatures were not
+        * checked against the finalized capabilities. This could be relaxed
+        * by doing the feature checks in hyp.
+        */
+       cpu_id = find_cpu_id(mpidr);
+       if (cpu_id == INVALID_CPU_ID)
+               return PSCI_RET_INVALID_PARAMS;
+
+       boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id);
+       init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id);
+
+       /* Check if the target CPU is already being booted. */
+       if (!try_acquire_boot_args(boot_args))
+               return PSCI_RET_ALREADY_ON;
+
+       boot_args->pc = pc;
+       boot_args->r0 = r0;
+       wmb();
+
+       ret = psci_call(func_id, mpidr,
+                       __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)),
+                       __hyp_pa(init_params));
+
+       /* If successful, the lock will be released by the target CPU. */
+       if (ret != PSCI_RET_SUCCESS)
+               release_boot_args(boot_args);
+
+       return ret;
+}
+
+static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(u64, power_state, host_ctxt, 1);
+       DECLARE_REG(unsigned long, pc, host_ctxt, 2);
+       DECLARE_REG(unsigned long, r0, host_ctxt, 3);
+
+       struct psci_boot_args *boot_args;
+       struct kvm_nvhe_init_params *init_params;
+
+       boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+       init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+
+       /*
+        * No need to acquire a lock before writing to boot_args because a core
+        * can only suspend itself. Racy CPU_ON calls use a separate struct.
+        */
+       boot_args->pc = pc;
+       boot_args->r0 = r0;
+
+       /*
+        * Will either return if shallow sleep state, or wake up into the entry
+        * point if it is a deep sleep state.
+        */
+       return psci_call(func_id, power_state,
+                        __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+                        __hyp_pa(init_params));
+}
+
+static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(unsigned long, pc, host_ctxt, 1);
+       DECLARE_REG(unsigned long, r0, host_ctxt, 2);
+
+       struct psci_boot_args *boot_args;
+       struct kvm_nvhe_init_params *init_params;
+
+       boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+       init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+
+       /*
+        * No need to acquire a lock before writing to boot_args because a core
+        * can only suspend itself. Racy CPU_ON calls use a separate struct.
+        */
+       boot_args->pc = pc;
+       boot_args->r0 = r0;
+
+       /* Will only return on error. */
+       return psci_call(func_id,
+                        __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+                        __hyp_pa(init_params), 0);
+}
+
+asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
+{
+       struct psci_boot_args *boot_args;
+       struct kvm_cpu_context *host_ctxt;
+
+       host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
+
+       if (is_cpu_on)
+               boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
+       else
+               boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+
+       cpu_reg(host_ctxt, 0) = boot_args->r0;
+       write_sysreg_el2(boot_args->pc, SYS_ELR);
+
+       if (is_cpu_on)
+               release_boot_args(boot_args);
+
+       __host_enter(host_ctxt);
+}
+
+static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+       if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) ||
+           (func_id == kvm_host_psci_0_1_function_ids.migrate))
+               return psci_forward(host_ctxt);
+       else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on)
+               return psci_cpu_on(func_id, host_ctxt);
+       else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend)
+               return psci_cpu_suspend(func_id, host_ctxt);
+       else
+               return PSCI_RET_NOT_SUPPORTED;
+}
+
+static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+       switch (func_id) {
+       case PSCI_0_2_FN_PSCI_VERSION:
+       case PSCI_0_2_FN_CPU_OFF:
+       case PSCI_0_2_FN64_AFFINITY_INFO:
+       case PSCI_0_2_FN64_MIGRATE:
+       case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+       case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+               return psci_forward(host_ctxt);
+       case PSCI_0_2_FN_SYSTEM_OFF:
+       case PSCI_0_2_FN_SYSTEM_RESET:
+               psci_forward_noreturn(host_ctxt);
+               unreachable();
+       case PSCI_0_2_FN64_CPU_SUSPEND:
+               return psci_cpu_suspend(func_id, host_ctxt);
+       case PSCI_0_2_FN64_CPU_ON:
+               return psci_cpu_on(func_id, host_ctxt);
+       default:
+               return PSCI_RET_NOT_SUPPORTED;
+       }
+}
+
+static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+       switch (func_id) {
+       case PSCI_1_0_FN_PSCI_FEATURES:
+       case PSCI_1_0_FN_SET_SUSPEND_MODE:
+       case PSCI_1_1_FN64_SYSTEM_RESET2:
+               return psci_forward(host_ctxt);
+       case PSCI_1_0_FN64_SYSTEM_SUSPEND:
+               return psci_system_suspend(func_id, host_ctxt);
+       default:
+               return psci_0_2_handler(func_id, host_ctxt);
+       }
+}
+
+bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
+{
+       u64 func_id = get_psci_func_id(host_ctxt);
+       unsigned long ret;
+
+       if (!is_psci_call(func_id))
+               return false;
+
+       switch (kvm_host_psci_version) {
+       case PSCI_VERSION(0, 1):
+               ret = psci_0_1_handler(func_id, host_ctxt);
+               break;
+       case PSCI_VERSION(0, 2):
+               ret = psci_0_2_handler(func_id, host_ctxt);
+               break;
+       default:
+               ret = psci_1_0_handler(func_id, host_ctxt);
+               break;
+       }
+
+       cpu_reg(host_ctxt, 0) = ret;
+       cpu_reg(host_ctxt, 1) = 0;
+       cpu_reg(host_ctxt, 2) = 0;
+       cpu_reg(host_ctxt, 3) = 0;
+       return true;
+}
index 8ae8160..f3d0e9e 100644 (file)
@@ -4,6 +4,7 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
+#include <hyp/adjust_pc.h>
 #include <hyp/switch.h>
 #include <hyp/sysreg-sr.h>
 
@@ -96,7 +97,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
        mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
 
        write_sysreg(mdcr_el2, mdcr_el2);
-       write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+       if (is_protected_kvm_enabled())
+               write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2);
+       else
+               write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
        write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
        write_sysreg(__kvm_hyp_host_vector, vbar_el2);
 }
@@ -189,6 +193,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 
        __sysreg_save_state_nvhe(host_ctxt);
 
+       __adjust_pc(vcpu);
+
        /*
         * We must restore the 32-bit state before the sysregs, thanks
         * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
index 88a25fc..2930502 100644 (file)
@@ -33,14 +33,3 @@ void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
        __sysreg_restore_user_state(ctxt);
        __sysreg_restore_el2_return_state(ctxt);
 }
-
-void __kvm_enable_ssbs(void)
-{
-       u64 tmp;
-
-       asm volatile(
-       "mrs    %0, sctlr_el2\n"
-       "orr    %0, %0, %1\n"
-       "msr    sctlr_el2, %0"
-       : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS));
-}
diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S
deleted file mode 100644 (file)
index b0441db..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2015-2018 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#include <linux/arm-smccc.h>
-#include <linux/linkage.h>
-
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-
-       /*
-        * This is not executed directly and is instead copied into the vectors
-        * by install_bp_hardening_cb().
-        */
-       .data
-       .pushsection    .rodata
-       .global         __smccc_workaround_1_smc
-SYM_DATA_START(__smccc_workaround_1_smc)
-       esb
-       sub     sp, sp, #(8 * 4)
-       stp     x2, x3, [sp, #(8 * 0)]
-       stp     x0, x1, [sp, #(8 * 2)]
-       mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1
-       smc     #0
-       ldp     x2, x3, [sp, #(8 * 0)]
-       ldp     x0, x1, [sp, #(8 * 2)]
-       add     sp, sp, #(8 * 4)
-1:     .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ
-       .org 1b
-SYM_DATA_END(__smccc_workaround_1_smc)
index bd1bab5..8f05856 100644 (file)
@@ -4,6 +4,8 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
+#include <hyp/adjust_pc.h>
+
 #include <linux/compiler.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/kvm_host.h>
index 452f4ca..80406f4 100644 (file)
@@ -4,6 +4,8 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
+#include <hyp/adjust_pc.h>
+
 #include <linux/compiler.h>
 #include <linux/irqchip/arm-gic-v3.h>
 #include <linux/kvm_host.h>
index 461e97c..96bec0e 100644 (file)
@@ -8,4 +8,4 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__
 
 obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o
 obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
-        ../fpsimd.o ../hyp-entry.o
+        ../fpsimd.o ../hyp-entry.o ../exception.o
index 62546e2..af8e940 100644 (file)
@@ -4,6 +4,7 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
+#include <hyp/adjust_pc.h>
 #include <hyp/switch.h>
 
 #include <linux/arm-smccc.h>
@@ -133,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
        __load_guest_stage2(vcpu->arch.hw_mmu);
        __activate_traps(vcpu);
 
+       __adjust_pc(vcpu);
+
        sysreg_restore_guest_state_vhe(guest_ctxt);
        __debug_switch_to_guest(vcpu);
 
index 34a96ab..b47df73 100644 (file)
 #include <asm/kvm_emulate.h>
 #include <asm/esr.h>
 
-#define CURRENT_EL_SP_EL0_VECTOR       0x0
-#define CURRENT_EL_SP_ELx_VECTOR       0x200
-#define LOWER_EL_AArch64_VECTOR                0x400
-#define LOWER_EL_AArch32_VECTOR                0x600
-
-enum exception_type {
-       except_type_sync        = 0,
-       except_type_irq         = 0x80,
-       except_type_fiq         = 0x100,
-       except_type_serror      = 0x180,
-};
-
-/*
- * This performs the exception entry at a given EL (@target_mode), stashing PC
- * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE.
- * The EL passed to this function *must* be a non-secure, privileged mode with
- * bit 0 being set (PSTATE.SP == 1).
- *
- * When an exception is taken, most PSTATE fields are left unchanged in the
- * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all
- * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx
- * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0.
- *
- * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429.
- * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426.
- *
- * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from
- * MSB to LSB.
- */
-static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
-                             enum exception_type type)
-{
-       unsigned long sctlr, vbar, old, new, mode;
-       u64 exc_offset;
-
-       mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
-
-       if      (mode == target_mode)
-               exc_offset = CURRENT_EL_SP_ELx_VECTOR;
-       else if ((mode | PSR_MODE_THREAD_BIT) == target_mode)
-               exc_offset = CURRENT_EL_SP_EL0_VECTOR;
-       else if (!(mode & PSR_MODE32_BIT))
-               exc_offset = LOWER_EL_AArch64_VECTOR;
-       else
-               exc_offset = LOWER_EL_AArch32_VECTOR;
-
-       switch (target_mode) {
-       case PSR_MODE_EL1h:
-               vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1);
-               sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
-               vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1);
-               break;
-       default:
-               /* Don't do that */
-               BUG();
-       }
-
-       *vcpu_pc(vcpu) = vbar + exc_offset + type;
-
-       old = *vcpu_cpsr(vcpu);
-       new = 0;
-
-       new |= (old & PSR_N_BIT);
-       new |= (old & PSR_Z_BIT);
-       new |= (old & PSR_C_BIT);
-       new |= (old & PSR_V_BIT);
-
-       // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
-
-       new |= (old & PSR_DIT_BIT);
-
-       // PSTATE.UAO is set to zero upon any exception to AArch64
-       // See ARM DDI 0487E.a, page D5-2579.
-
-       // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0
-       // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented
-       // See ARM DDI 0487E.a, page D5-2578.
-       new |= (old & PSR_PAN_BIT);
-       if (!(sctlr & SCTLR_EL1_SPAN))
-               new |= PSR_PAN_BIT;
-
-       // PSTATE.SS is set to zero upon any exception to AArch64
-       // See ARM DDI 0487E.a, page D2-2452.
-
-       // PSTATE.IL is set to zero upon any exception to AArch64
-       // See ARM DDI 0487E.a, page D1-2306.
-
-       // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64
-       // See ARM DDI 0487E.a, page D13-3258
-       if (sctlr & SCTLR_ELx_DSSBS)
-               new |= PSR_SSBS_BIT;
-
-       // PSTATE.BTYPE is set to zero upon any exception to AArch64
-       // See ARM DDI 0487E.a, pages D1-2293 to D1-2294.
-
-       new |= PSR_D_BIT;
-       new |= PSR_A_BIT;
-       new |= PSR_I_BIT;
-       new |= PSR_F_BIT;
-
-       new |= target_mode;
-
-       *vcpu_cpsr(vcpu) = new;
-       vcpu_write_spsr(vcpu, old);
-}
-
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
        unsigned long cpsr = *vcpu_cpsr(vcpu);
        bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
        u32 esr = 0;
 
-       enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
+       vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1          |
+                            KVM_ARM64_EXCEPT_AA64_ELx_SYNC     |
+                            KVM_ARM64_PENDING_EXCEPTION);
 
        vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
 
@@ -156,7 +52,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 {
        u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-       enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync);
+       vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1          |
+                            KVM_ARM64_EXCEPT_AA64_ELx_SYNC     |
+                            KVM_ARM64_PENDING_EXCEPTION);
 
        /*
         * Build an unknown exception, depending on the instruction
@@ -168,6 +66,53 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
        vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
 }
 
+#define DFSR_FSC_EXTABT_LPAE   0x10
+#define DFSR_FSC_EXTABT_nLPAE  0x08
+#define DFSR_LPAE              BIT(9)
+#define TTBCR_EAE              BIT(31)
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND |
+                            KVM_ARM64_PENDING_EXCEPTION);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
+{
+       u64 far;
+       u32 fsr;
+
+       /* Give the guest an IMPLEMENTATION DEFINED exception */
+       if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) {
+               fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
+       } else {
+               /* no need to shuffle FS[4] into DFSR[10] as its 0 */
+               fsr = DFSR_FSC_EXTABT_nLPAE;
+       }
+
+       far = vcpu_read_sys_reg(vcpu, FAR_EL1);
+
+       if (is_pabt) {
+               vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT |
+                                    KVM_ARM64_PENDING_EXCEPTION);
+               far &= GENMASK(31, 0);
+               far |= (u64)addr << 32;
+               vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2);
+       } else { /* !iabt */
+               vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT |
+                                    KVM_ARM64_PENDING_EXCEPTION);
+               far &= GENMASK(63, 32);
+               far |= addr;
+               vcpu_write_sys_reg(vcpu, fsr, ESR_EL1);
+       }
+
+       vcpu_write_sys_reg(vcpu, far, FAR_EL1);
+}
+
 /**
  * kvm_inject_dabt - inject a data abort into the guest
  * @vcpu: The VCPU to receive the data abort
@@ -179,7 +124,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
        if (vcpu_el1_is_32bit(vcpu))
-               kvm_inject_dabt32(vcpu, addr);
+               inject_abt32(vcpu, false, addr);
        else
                inject_abt64(vcpu, false, addr);
 }
@@ -195,7 +140,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
        if (vcpu_el1_is_32bit(vcpu))
-               kvm_inject_pabt32(vcpu, addr);
+               inject_abt32(vcpu, true, addr);
        else
                inject_abt64(vcpu, true, addr);
 }
@@ -210,7 +155,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
        if (vcpu_el1_is_32bit(vcpu))
-               kvm_inject_undef32(vcpu);
+               inject_undef32(vcpu);
        else
                inject_undef64(vcpu);
 }
index 6a2826f..3e2d8ba 100644 (file)
@@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
         * The MMIO instruction is emulated and should not be re-executed
         * in the guest.
         */
-       kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+       kvm_incr_pc(vcpu);
 
        return 0;
 }
index 75814a0..7d2257c 100644 (file)
@@ -1023,7 +1023,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
                 * cautious, and skip the instruction.
                 */
                if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
-                       kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+                       kvm_incr_pc(vcpu);
                        ret = 1;
                        goto out_unlock;
                }
index 2ed5ef8..398f6df 100644 (file)
@@ -384,7 +384,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
        struct kvm_pmu *pmu = &vcpu->arch.pmu;
        bool overflow;
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
+       if (!kvm_vcpu_has_pmu(vcpu))
                return;
 
        overflow = !!kvm_pmu_overflow_status(vcpu);
@@ -825,9 +825,12 @@ bool kvm_arm_support_pmu_v3(void)
 
 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
 {
-       if (!vcpu->arch.pmu.created)
+       if (!kvm_vcpu_has_pmu(vcpu))
                return 0;
 
+       if (!vcpu->arch.pmu.created)
+               return -EINVAL;
+
        /*
         * A valid interrupt configuration for the PMU is either to have a
         * properly configured interrupt number and using an in-kernel
@@ -835,9 +838,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
         */
        if (irqchip_in_kernel(vcpu->kvm)) {
                int irq = vcpu->arch.pmu.irq_num;
-               if (!kvm_arm_pmu_irq_initialized(vcpu))
-                       return -EINVAL;
-
                /*
                 * If we are using an in-kernel vgic, at this point we know
                 * the vgic will be initialized, so we can check the PMU irq
@@ -851,7 +851,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
        }
 
        kvm_pmu_vcpu_reset(vcpu);
-       vcpu->arch.pmu.ready = true;
 
        return 0;
 }
@@ -913,8 +912,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
 
 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
-       if (!kvm_arm_support_pmu_v3() ||
-           !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+       if (!kvm_vcpu_has_pmu(vcpu))
                return -ENODEV;
 
        if (vcpu->arch.pmu.created)
@@ -1015,7 +1013,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
                if (!irqchip_in_kernel(vcpu->kvm))
                        return -EINVAL;
 
-               if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+               if (!kvm_vcpu_has_pmu(vcpu))
                        return -ENODEV;
 
                if (!kvm_arm_pmu_irq_initialized(vcpu))
@@ -1035,8 +1033,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
        case KVM_ARM_VCPU_PMU_V3_IRQ:
        case KVM_ARM_VCPU_PMU_V3_INIT:
        case KVM_ARM_VCPU_PMU_V3_FILTER:
-               if (kvm_arm_support_pmu_v3() &&
-                   test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+               if (kvm_vcpu_has_pmu(vcpu))
                        return 0;
        }
 
index 920ac43..78a09f7 100644 (file)
@@ -53,7 +53,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
        struct pvclock_vcpu_stolen_time init_values = {};
        struct kvm *kvm = vcpu->kvm;
        u64 base = vcpu->arch.steal.base;
-       int idx;
 
        if (base == GPA_INVALID)
                return base;
@@ -63,10 +62,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
         * the feature enabled.
         */
        vcpu->arch.steal.last_steal = current->sched_info.run_delay;
-
-       idx = srcu_read_lock(&kvm->srcu);
-       kvm_write_guest(kvm, base, &init_values, sizeof(init_values));
-       srcu_read_unlock(&kvm->srcu, idx);
+       kvm_write_guest_lock(kvm, base, &init_values, sizeof(init_values));
 
        return base;
 }
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
deleted file mode 100644 (file)
index accc1d5..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * Derived from arch/arm/kvm/emulate.c:
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- */
-
-#include <linux/mm.h>
-#include <linux/kvm_host.h>
-#include <asm/kvm_emulate.h>
-#include <asm/ptrace.h>
-
-#define VCPU_NR_MODES 6
-#define REG_OFFSET(_reg) \
-       (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
-
-#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
-
-static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
-       /* USR Registers */
-       {
-               USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
-               USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
-               USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
-               USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
-               USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14),
-               REG_OFFSET(pc)
-       },
-
-       /* FIQ Registers */
-       {
-               USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
-               USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
-               USR_REG_OFFSET(6), USR_REG_OFFSET(7),
-               REG_OFFSET(compat_r8_fiq),  /* r8 */
-               REG_OFFSET(compat_r9_fiq),  /* r9 */
-               REG_OFFSET(compat_r10_fiq), /* r10 */
-               REG_OFFSET(compat_r11_fiq), /* r11 */
-               REG_OFFSET(compat_r12_fiq), /* r12 */
-               REG_OFFSET(compat_sp_fiq),  /* r13 */
-               REG_OFFSET(compat_lr_fiq),  /* r14 */
-               REG_OFFSET(pc)
-       },
-
-       /* IRQ Registers */
-       {
-               USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
-               USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
-               USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
-               USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
-               USR_REG_OFFSET(12),
-               REG_OFFSET(compat_sp_irq), /* r13 */
-               REG_OFFSET(compat_lr_irq), /* r14 */
-               REG_OFFSET(pc)
-       },
-
-       /* SVC Registers */
-       {
-               USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
-               USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
-               USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
-               USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
-               USR_REG_OFFSET(12),
-               REG_OFFSET(compat_sp_svc), /* r13 */
-               REG_OFFSET(compat_lr_svc), /* r14 */
-               REG_OFFSET(pc)
-       },
-
-       /* ABT Registers */
-       {
-               USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
-               USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
-               USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
-               USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
-               USR_REG_OFFSET(12),
-               REG_OFFSET(compat_sp_abt), /* r13 */
-               REG_OFFSET(compat_lr_abt), /* r14 */
-               REG_OFFSET(pc)
-       },
-
-       /* UND Registers */
-       {
-               USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
-               USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
-               USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
-               USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
-               USR_REG_OFFSET(12),
-               REG_OFFSET(compat_sp_und), /* r13 */
-               REG_OFFSET(compat_lr_und), /* r14 */
-               REG_OFFSET(pc)
-       },
-};
-
-/*
- * Return a pointer to the register number valid in the current mode of
- * the virtual CPU.
- */
-unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
-{
-       unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs;
-       unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
-
-       switch (mode) {
-       case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC:
-               mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
-               break;
-
-       case PSR_AA32_MODE_ABT:
-               mode = 4;
-               break;
-
-       case PSR_AA32_MODE_UND:
-               mode = 5;
-               break;
-
-       case PSR_AA32_MODE_SYS:
-               mode = 0;       /* SYS maps to USR */
-               break;
-
-       default:
-               BUG();
-       }
-
-       return reg_array + vcpu_reg_offsets[mode][reg_num];
-}
-
-/*
- * Return the SPSR for the current mode of the virtual CPU.
- */
-static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
-{
-       unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK;
-       switch (mode) {
-       case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC;
-       case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT;
-       case PSR_AA32_MODE_UND: return KVM_SPSR_UND;
-       case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ;
-       case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ;
-       default: BUG();
-       }
-}
-
-unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
-{
-       int spsr_idx = vcpu_spsr32_mode(vcpu);
-
-       if (!vcpu->arch.sysregs_loaded_on_cpu) {
-               switch (spsr_idx) {
-               case KVM_SPSR_SVC:
-                       return __vcpu_sys_reg(vcpu, SPSR_EL1);
-               case KVM_SPSR_ABT:
-                       return vcpu->arch.ctxt.spsr_abt;
-               case KVM_SPSR_UND:
-                       return vcpu->arch.ctxt.spsr_und;
-               case KVM_SPSR_IRQ:
-                       return vcpu->arch.ctxt.spsr_irq;
-               case KVM_SPSR_FIQ:
-                       return vcpu->arch.ctxt.spsr_fiq;
-               }
-       }
-
-       switch (spsr_idx) {
-       case KVM_SPSR_SVC:
-               return read_sysreg_el1(SYS_SPSR);
-       case KVM_SPSR_ABT:
-               return read_sysreg(spsr_abt);
-       case KVM_SPSR_UND:
-               return read_sysreg(spsr_und);
-       case KVM_SPSR_IRQ:
-               return read_sysreg(spsr_irq);
-       case KVM_SPSR_FIQ:
-               return read_sysreg(spsr_fiq);
-       default:
-               BUG();
-       }
-}
-
-void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
-{
-       int spsr_idx = vcpu_spsr32_mode(vcpu);
-
-       if (!vcpu->arch.sysregs_loaded_on_cpu) {
-               switch (spsr_idx) {
-               case KVM_SPSR_SVC:
-                       __vcpu_sys_reg(vcpu, SPSR_EL1) = v;
-                       break;
-               case KVM_SPSR_ABT:
-                       vcpu->arch.ctxt.spsr_abt = v;
-                       break;
-               case KVM_SPSR_UND:
-                       vcpu->arch.ctxt.spsr_und = v;
-                       break;
-               case KVM_SPSR_IRQ:
-                       vcpu->arch.ctxt.spsr_irq = v;
-                       break;
-               case KVM_SPSR_FIQ:
-                       vcpu->arch.ctxt.spsr_fiq = v;
-                       break;
-               }
-
-               return;
-       }
-
-       switch (spsr_idx) {
-       case KVM_SPSR_SVC:
-               write_sysreg_el1(v, SYS_SPSR);
-               break;
-       case KVM_SPSR_ABT:
-               write_sysreg(v, spsr_abt);
-               break;
-       case KVM_SPSR_UND:
-               write_sysreg(v, spsr_und);
-               break;
-       case KVM_SPSR_IRQ:
-               write_sysreg(v, spsr_irq);
-               break;
-       case KVM_SPSR_FIQ:
-               write_sysreg(v, spsr_fiq);
-               break;
-       }
-}
index f324902..47f3f03 100644 (file)
@@ -25,7 +25,6 @@
 #include <asm/ptrace.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
-#include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
 #include <asm/virt.h>
@@ -42,58 +41,6 @@ static u32 kvm_ipa_limit;
 #define VCPU_RESET_PSTATE_SVC  (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
                                 PSR_AA32_I_BIT | PSR_AA32_F_BIT)
 
-static bool system_has_full_ptr_auth(void)
-{
-       return system_supports_address_auth() && system_supports_generic_auth();
-}
-
-/**
- * kvm_arch_vm_ioctl_check_extension
- *
- * We currently assume that the number of HW registers is uniform
- * across all CPUs (see cpuinfo_sanity_check).
- */
-int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-       int r;
-
-       switch (ext) {
-       case KVM_CAP_ARM_EL1_32BIT:
-               r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
-               break;
-       case KVM_CAP_GUEST_DEBUG_HW_BPS:
-               r = get_num_brps();
-               break;
-       case KVM_CAP_GUEST_DEBUG_HW_WPS:
-               r = get_num_wrps();
-               break;
-       case KVM_CAP_ARM_PMU_V3:
-               r = kvm_arm_support_pmu_v3();
-               break;
-       case KVM_CAP_ARM_INJECT_SERROR_ESR:
-               r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
-               break;
-       case KVM_CAP_SET_GUEST_DEBUG:
-       case KVM_CAP_VCPU_ATTRIBUTES:
-               r = 1;
-               break;
-       case KVM_CAP_ARM_VM_IPA_SIZE:
-               r = kvm_ipa_limit;
-               break;
-       case KVM_CAP_ARM_SVE:
-               r = system_supports_sve();
-               break;
-       case KVM_CAP_ARM_PTRAUTH_ADDRESS:
-       case KVM_CAP_ARM_PTRAUTH_GENERIC:
-               r = system_has_full_ptr_auth();
-               break;
-       default:
-               r = 0;
-       }
-
-       return r;
-}
-
 unsigned int kvm_sve_max_vl;
 
 int kvm_arm_init_sve(void)
@@ -286,6 +233,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
                        pstate = VCPU_RESET_PSTATE_EL1;
                }
 
+               if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+                       ret = -EINVAL;
+                       goto out;
+               }
                break;
        }
 
index c1fac98..3313ded 100644 (file)
@@ -20,7 +20,6 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
-#include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
@@ -64,87 +63,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
        return false;
 }
 
-static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
-{
-       /*
-        * System registers listed in the switch are not saved on every
-        * exit from the guest but are only saved on vcpu_put.
-        *
-        * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
-        * should never be listed below, because the guest cannot modify its
-        * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
-        * thread when emulating cross-VCPU communication.
-        */
-       switch (reg) {
-       case CSSELR_EL1:        *val = read_sysreg_s(SYS_CSSELR_EL1);   break;
-       case SCTLR_EL1:         *val = read_sysreg_s(SYS_SCTLR_EL12);   break;
-       case CPACR_EL1:         *val = read_sysreg_s(SYS_CPACR_EL12);   break;
-       case TTBR0_EL1:         *val = read_sysreg_s(SYS_TTBR0_EL12);   break;
-       case TTBR1_EL1:         *val = read_sysreg_s(SYS_TTBR1_EL12);   break;
-       case TCR_EL1:           *val = read_sysreg_s(SYS_TCR_EL12);     break;
-       case ESR_EL1:           *val = read_sysreg_s(SYS_ESR_EL12);     break;
-       case AFSR0_EL1:         *val = read_sysreg_s(SYS_AFSR0_EL12);   break;
-       case AFSR1_EL1:         *val = read_sysreg_s(SYS_AFSR1_EL12);   break;
-       case FAR_EL1:           *val = read_sysreg_s(SYS_FAR_EL12);     break;
-       case MAIR_EL1:          *val = read_sysreg_s(SYS_MAIR_EL12);    break;
-       case VBAR_EL1:          *val = read_sysreg_s(SYS_VBAR_EL12);    break;
-       case CONTEXTIDR_EL1:    *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break;
-       case TPIDR_EL0:         *val = read_sysreg_s(SYS_TPIDR_EL0);    break;
-       case TPIDRRO_EL0:       *val = read_sysreg_s(SYS_TPIDRRO_EL0);  break;
-       case TPIDR_EL1:         *val = read_sysreg_s(SYS_TPIDR_EL1);    break;
-       case AMAIR_EL1:         *val = read_sysreg_s(SYS_AMAIR_EL12);   break;
-       case CNTKCTL_EL1:       *val = read_sysreg_s(SYS_CNTKCTL_EL12); break;
-       case ELR_EL1:           *val = read_sysreg_s(SYS_ELR_EL12);     break;
-       case PAR_EL1:           *val = read_sysreg_par();               break;
-       case DACR32_EL2:        *val = read_sysreg_s(SYS_DACR32_EL2);   break;
-       case IFSR32_EL2:        *val = read_sysreg_s(SYS_IFSR32_EL2);   break;
-       case DBGVCR32_EL2:      *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
-       default:                return false;
-       }
-
-       return true;
-}
-
-static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
-{
-       /*
-        * System registers listed in the switch are not restored on every
-        * entry to the guest but are only restored on vcpu_load.
-        *
-        * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
-        * should never be listed below, because the MPIDR should only be set
-        * once, before running the VCPU, and never changed later.
-        */
-       switch (reg) {
-       case CSSELR_EL1:        write_sysreg_s(val, SYS_CSSELR_EL1);    break;
-       case SCTLR_EL1:         write_sysreg_s(val, SYS_SCTLR_EL12);    break;
-       case CPACR_EL1:         write_sysreg_s(val, SYS_CPACR_EL12);    break;
-       case TTBR0_EL1:         write_sysreg_s(val, SYS_TTBR0_EL12);    break;
-       case TTBR1_EL1:         write_sysreg_s(val, SYS_TTBR1_EL12);    break;
-       case TCR_EL1:           write_sysreg_s(val, SYS_TCR_EL12);      break;
-       case ESR_EL1:           write_sysreg_s(val, SYS_ESR_EL12);      break;
-       case AFSR0_EL1:         write_sysreg_s(val, SYS_AFSR0_EL12);    break;
-       case AFSR1_EL1:         write_sysreg_s(val, SYS_AFSR1_EL12);    break;
-       case FAR_EL1:           write_sysreg_s(val, SYS_FAR_EL12);      break;
-       case MAIR_EL1:          write_sysreg_s(val, SYS_MAIR_EL12);     break;
-       case VBAR_EL1:          write_sysreg_s(val, SYS_VBAR_EL12);     break;
-       case CONTEXTIDR_EL1:    write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break;
-       case TPIDR_EL0:         write_sysreg_s(val, SYS_TPIDR_EL0);     break;
-       case TPIDRRO_EL0:       write_sysreg_s(val, SYS_TPIDRRO_EL0);   break;
-       case TPIDR_EL1:         write_sysreg_s(val, SYS_TPIDR_EL1);     break;
-       case AMAIR_EL1:         write_sysreg_s(val, SYS_AMAIR_EL12);    break;
-       case CNTKCTL_EL1:       write_sysreg_s(val, SYS_CNTKCTL_EL12);  break;
-       case ELR_EL1:           write_sysreg_s(val, SYS_ELR_EL12);      break;
-       case PAR_EL1:           write_sysreg_s(val, SYS_PAR_EL1);       break;
-       case DACR32_EL2:        write_sysreg_s(val, SYS_DACR32_EL2);    break;
-       case IFSR32_EL2:        write_sysreg_s(val, SYS_IFSR32_EL2);    break;
-       case DBGVCR32_EL2:      write_sysreg_s(val, SYS_DBGVCR32_EL2);  break;
-       default:                return false;
-       }
-
-       return true;
-}
-
 u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
 {
        u64 val = 0x8badf00d8badf00d;
@@ -169,7 +87,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
 static u32 cache_levels;
 
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
-#define CSSELR_MAX 12
+#define CSSELR_MAX 14
 
 /* Which cache CCSIDR represents depends on CSSELR value. */
 static u32 get_ccsidr(u32 csselr)
@@ -209,6 +127,24 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
        return true;
 }
 
+static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
+{
+       switch (r->aarch32_map) {
+       case AA32_LO:
+               *mask = GENMASK_ULL(31, 0);
+               *shift = 0;
+               break;
+       case AA32_HI:
+               *mask = GENMASK_ULL(63, 32);
+               *shift = 32;
+               break;
+       default:
+               *mask = GENMASK_ULL(63, 0);
+               *shift = 0;
+               break;
+       }
+}
+
 /*
  * Generic accessor for VM registers. Only called as long as HCR_TVM
  * is set. If the guest enables the MMU, we stop trapping the VM
@@ -219,26 +155,21 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
                          const struct sys_reg_desc *r)
 {
        bool was_enabled = vcpu_has_cache_enabled(vcpu);
-       u64 val;
-       int reg = r->reg;
+       u64 val, mask, shift;
 
        BUG_ON(!p->is_write);
 
-       /* See the 32bit mapping in kvm_host.h */
-       if (p->is_aarch32)
-               reg = r->reg / 2;
+       get_access_mask(r, &mask, &shift);
 
-       if (!p->is_aarch32 || !p->is_32bit) {
-               val = p->regval;
+       if (~mask) {
+               val = vcpu_read_sys_reg(vcpu, r->reg);
+               val &= ~mask;
        } else {
-               val = vcpu_read_sys_reg(vcpu, reg);
-               if (r->reg % 2)
-                       val = (p->regval << 32) | (u64)lower_32_bits(val);
-               else
-                       val = ((u64)upper_32_bits(val) << 32) |
-                               lower_32_bits(p->regval);
+               val = 0;
        }
-       vcpu_write_sys_reg(vcpu, val, reg);
+
+       val |= (p->regval & (mask >> shift)) << shift;
+       vcpu_write_sys_reg(vcpu, val, r->reg);
 
        kvm_toggle_cache(vcpu, was_enabled);
        return true;
@@ -248,17 +179,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
                         struct sys_reg_params *p,
                         const struct sys_reg_desc *r)
 {
+       u64 mask, shift;
+
        if (p->is_write)
                return ignore_write(vcpu, p);
 
-       p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
-
-       if (p->is_aarch32) {
-               if (r->Op2 & 2)
-                       p->regval = upper_32_bits(p->regval);
-               else
-                       p->regval = lower_32_bits(p->regval);
-       }
+       get_access_mask(r, &mask, &shift);
+       p->regval = (vcpu_read_sys_reg(vcpu, r->reg) & mask) >> shift;
 
        return true;
 }
@@ -285,7 +212,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
         * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
         * group.
         */
-       if (p->is_aarch32) {
+       if (p->Op0 == 0) {              /* AArch32 */
                switch (p->Op1) {
                default:                /* Keep GCC quiet */
                case 0:                 /* ICC_SGI1R */
@@ -296,7 +223,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
                        g1 = false;
                        break;
                }
-       } else {
+       } else {                        /* AArch64 */
                switch (p->Op2) {
                default:                /* Keep GCC quiet */
                case 5:                 /* ICC_SGI1R_EL1 */
@@ -438,26 +365,30 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
  */
 static void reg_to_dbg(struct kvm_vcpu *vcpu,
                       struct sys_reg_params *p,
+                      const struct sys_reg_desc *rd,
                       u64 *dbg_reg)
 {
-       u64 val = p->regval;
+       u64 mask, shift, val;
 
-       if (p->is_32bit) {
-               val &= 0xffffffffUL;
-               val |= ((*dbg_reg >> 32) << 32);
-       }
+       get_access_mask(rd, &mask, &shift);
 
+       val = *dbg_reg;
+       val &= ~mask;
+       val |= (p->regval & (mask >> shift)) << shift;
        *dbg_reg = val;
+
        vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
 }
 
 static void dbg_to_reg(struct kvm_vcpu *vcpu,
                       struct sys_reg_params *p,
+                      const struct sys_reg_desc *rd,
                       u64 *dbg_reg)
 {
-       p->regval = *dbg_reg;
-       if (p->is_32bit)
-               p->regval &= 0xffffffffUL;
+       u64 mask, shift;
+
+       get_access_mask(rd, &mask, &shift);
+       p->regval = (*dbg_reg & mask) >> shift;
 }
 
 static bool trap_bvr(struct kvm_vcpu *vcpu,
@@ -467,9 +398,9 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
        u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
        if (p->is_write)
-               reg_to_dbg(vcpu, p, dbg_reg);
+               reg_to_dbg(vcpu, p, rd, dbg_reg);
        else
-               dbg_to_reg(vcpu, p, dbg_reg);
+               dbg_to_reg(vcpu, p, rd, dbg_reg);
 
        trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
 
@@ -509,9 +440,9 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
        u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
 
        if (p->is_write)
-               reg_to_dbg(vcpu, p, dbg_reg);
+               reg_to_dbg(vcpu, p, rd, dbg_reg);
        else
-               dbg_to_reg(vcpu, p, dbg_reg);
+               dbg_to_reg(vcpu, p, rd, dbg_reg);
 
        trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
 
@@ -552,9 +483,9 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
        u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
 
        if (p->is_write)
-               reg_to_dbg(vcpu, p, dbg_reg);
+               reg_to_dbg(vcpu, p, rd, dbg_reg);
        else
-               dbg_to_reg(vcpu, p, dbg_reg);
+               dbg_to_reg(vcpu, p, rd, dbg_reg);
 
        trace_trap_reg(__func__, rd->reg, p->is_write,
                vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
@@ -595,9 +526,9 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
        u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
 
        if (p->is_write)
-               reg_to_dbg(vcpu, p, dbg_reg);
+               reg_to_dbg(vcpu, p, rd, dbg_reg);
        else
-               dbg_to_reg(vcpu, p, dbg_reg);
+               dbg_to_reg(vcpu, p, rd, dbg_reg);
 
        trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
 
@@ -678,8 +609,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
 {
        u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
-       bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
+       bool enabled = kvm_vcpu_has_pmu(vcpu);
 
+       enabled &= (reg & flags) || vcpu_mode_priv(vcpu);
        if (!enabled)
                kvm_inject_undefined(vcpu);
 
@@ -711,9 +643,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 val;
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        if (pmu_access_el0_disabled(vcpu))
                return false;
 
@@ -740,9 +669,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
                          const struct sys_reg_desc *r)
 {
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        if (pmu_access_event_counter_el0_disabled(vcpu))
                return false;
 
@@ -761,9 +687,6 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 pmceid;
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        BUG_ON(p->is_write);
 
        if (pmu_access_el0_disabled(vcpu))
@@ -794,10 +717,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
                              struct sys_reg_params *p,
                              const struct sys_reg_desc *r)
 {
-       u64 idx;
-
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
+       u64 idx = ~0UL;
 
        if (r->CRn == 9 && r->CRm == 13) {
                if (r->Op2 == 2) {
@@ -813,8 +733,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
                                return false;
 
                        idx = ARMV8_PMU_CYCLE_IDX;
-               } else {
-                       return false;
                }
        } else if (r->CRn == 0 && r->CRm == 9) {
                /* PMCCNTR */
@@ -828,10 +746,11 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
                        return false;
 
                idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
-       } else {
-               return false;
        }
 
+       /* Catch any decoding mistake */
+       WARN_ON(idx == ~0UL);
+
        if (!pmu_counter_idx_valid(vcpu, idx))
                return false;
 
@@ -852,9 +771,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 idx, reg;
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        if (pmu_access_el0_disabled(vcpu))
                return false;
 
@@ -892,9 +808,6 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 val, mask;
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        if (pmu_access_el0_disabled(vcpu))
                return false;
 
@@ -923,13 +836,8 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 mask = kvm_pmu_valid_counter_mask(vcpu);
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
-       if (!vcpu_mode_priv(vcpu)) {
-               kvm_inject_undefined(vcpu);
+       if (check_pmu_access_disabled(vcpu, 0))
                return false;
-       }
 
        if (p->is_write) {
                u64 val = p->regval & mask;
@@ -952,9 +860,6 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 mask = kvm_pmu_valid_counter_mask(vcpu);
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        if (pmu_access_el0_disabled(vcpu))
                return false;
 
@@ -977,9 +882,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        u64 mask;
 
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
-
        if (!p->is_write)
                return read_from_write_only(vcpu, p, r);
 
@@ -994,8 +896,10 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
                             const struct sys_reg_desc *r)
 {
-       if (!kvm_arm_pmu_v3_ready(vcpu))
-               return trap_raz_wi(vcpu, p, r);
+       if (!kvm_vcpu_has_pmu(vcpu)) {
+               kvm_inject_undefined(vcpu);
+               return false;
+       }
 
        if (p->is_write) {
                if (!vcpu_mode_priv(vcpu)) {
@@ -1122,6 +1026,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
                val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
                val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT);
                val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT);
+               val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT);
+               val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT);
        } else if (id == SYS_ID_AA64PFR1_EL1) {
                val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
        } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
@@ -1130,10 +1036,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
                         (0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
                         (0xfUL << ID_AA64ISAR1_GPI_SHIFT));
        } else if (id == SYS_ID_AA64DFR0_EL1) {
+               u64 cap = 0;
+
                /* Limit guests to PMUv3 for ARMv8.1 */
+               if (kvm_vcpu_has_pmu(vcpu))
+                       cap = ID_AA64DFR0_PMUVER_8_1;
+
                val = cpuid_feature_cap_perfmon_field(val,
                                                ID_AA64DFR0_PMUVER_SHIFT,
-                                               ID_AA64DFR0_PMUVER_8_1);
+                                               cap);
        } else if (id == SYS_ID_DFR0_EL1) {
                /* Limit guests to PMUv3 for ARMv8.1 */
                val = cpuid_feature_cap_perfmon_field(val,
@@ -1209,9 +1120,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
                               const struct kvm_one_reg *reg, void __user *uaddr)
 {
        const u64 id = sys_reg_to_index(rd);
+       u8 csv2, csv3;
        int err;
        u64 val;
-       u8 csv2;
 
        err = reg_from_user(&val, uaddr, id);
        if (err)
@@ -1227,13 +1138,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
            (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))
                return -EINVAL;
 
-       /* We can only differ with CSV2, and anything else is an error */
+       /* Same thing for CSV3 */
+       csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT);
+       if (csv3 > 1 ||
+           (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED))
+               return -EINVAL;
+
+       /* We can only differ with CSV[23], and anything else is an error */
        val ^= read_id_reg(vcpu, rd, false);
-       val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT);
+       val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) |
+                (0xFUL << ID_AA64PFR0_CSV3_SHIFT));
        if (val)
                return -EINVAL;
 
        vcpu->kvm->arch.pfr0_csv2 = csv2;
+       vcpu->kvm->arch.pfr0_csv3 = csv3 ;
 
        return 0;
 }
@@ -1327,10 +1246,6 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 {
        int reg = r->reg;
 
-       /* See the 32bit mapping in kvm_host.h */
-       if (p->is_aarch32)
-               reg = r->reg / 2;
-
        if (p->is_write)
                vcpu_write_sys_reg(vcpu, p->regval, reg);
        else
@@ -1801,66 +1716,27 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
        }
 }
 
-static bool trap_debug32(struct kvm_vcpu *vcpu,
-                        struct sys_reg_params *p,
-                        const struct sys_reg_desc *r)
-{
-       if (p->is_write) {
-               vcpu_cp14(vcpu, r->reg) = p->regval;
-               vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
-       } else {
-               p->regval = vcpu_cp14(vcpu, r->reg);
-       }
-
-       return true;
-}
-
-/* AArch32 debug register mappings
+/*
+ * AArch32 debug register mappings
  *
  * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0]
  * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32]
  *
- * All control registers and watchpoint value registers are mapped to
- * the lower 32 bits of their AArch64 equivalents. We share the trap
- * handlers with the above AArch64 code which checks what mode the
- * system is in.
+ * None of the other registers share their location, so treat them as
+ * if they were 64bit.
  */
-
-static bool trap_xvr(struct kvm_vcpu *vcpu,
-                    struct sys_reg_params *p,
-                    const struct sys_reg_desc *rd)
-{
-       u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
-
-       if (p->is_write) {
-               u64 val = *dbg_reg;
-
-               val &= 0xffffffffUL;
-               val |= p->regval << 32;
-               *dbg_reg = val;
-
-               vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
-       } else {
-               p->regval = *dbg_reg >> 32;
-       }
-
-       trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
-
-       return true;
-}
-
-#define DBG_BCR_BVR_WCR_WVR(n)                                         \
-       /* DBGBVRn */                                                   \
-       { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n },     \
-       /* DBGBCRn */                                                   \
-       { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n },     \
-       /* DBGWVRn */                                                   \
-       { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n },     \
-       /* DBGWCRn */                                                   \
+#define DBG_BCR_BVR_WCR_WVR(n)                                               \
+       /* DBGBVRn */                                                         \
+       { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \
+       /* DBGBCRn */                                                         \
+       { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n },           \
+       /* DBGWVRn */                                                         \
+       { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n },           \
+       /* DBGWCRn */                                                         \
        { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n }
 
-#define DBGBXVR(n)                                                     \
-       { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n }
+#define DBGBXVR(n)                                                           \
+       { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n }
 
 /*
  * Trapped cp14 registers. We generally ignore most of the external
@@ -1878,9 +1754,9 @@ static const struct sys_reg_desc cp14_regs[] = {
        { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
        DBG_BCR_BVR_WCR_WVR(1),
        /* DBGDCCINT */
-       { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT },
+       { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug_regs, NULL, MDCCINT_EL1 },
        /* DBGDSCRext */
-       { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext },
+       { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug_regs, NULL, MDSCR_EL1 },
        DBG_BCR_BVR_WCR_WVR(2),
        /* DBGDTR[RT]Xint */
        { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
@@ -1895,7 +1771,7 @@ static const struct sys_reg_desc cp14_regs[] = {
        { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
        DBG_BCR_BVR_WCR_WVR(6),
        /* DBGVCR */
-       { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR },
+       { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug_regs, NULL, DBGVCR32_EL2 },
        DBG_BCR_BVR_WCR_WVR(7),
        DBG_BCR_BVR_WCR_WVR(8),
        DBG_BCR_BVR_WCR_WVR(9),
@@ -1981,19 +1857,29 @@ static const struct sys_reg_desc cp14_64_regs[] = {
  */
 static const struct sys_reg_desc cp15_regs[] = {
        { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr },
-       { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
-       { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr },
-       { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr },
-       { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
-       { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
-       { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
-       { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
-       { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
-       { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
-       { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
-       { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
-       { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
-       { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+       { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, SCTLR_EL1 },
+       /* ACTLR */
+       { AA32(LO), Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr, NULL, ACTLR_EL1 },
+       /* ACTLR2 */
+       { AA32(HI), Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr, NULL, ACTLR_EL1 },
+       { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
+       { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, TTBR1_EL1 },
+       /* TTBCR */
+       { AA32(LO), Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, TCR_EL1 },
+       /* TTBCR2 */
+       { AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 },
+       { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 },
+       /* DFSR */
+       { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 },
+       { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 },
+       /* ADFSR */
+       { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, AFSR0_EL1 },
+       /* AIFSR */
+       { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, AFSR1_EL1 },
+       /* DFAR */
+       { AA32(LO), Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, FAR_EL1 },
+       /* IFAR */
+       { AA32(HI), Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, FAR_EL1 },
 
        /*
         * DC{C,I,CI}SW operations:
@@ -2019,15 +1905,19 @@ static const struct sys_reg_desc cp15_regs[] = {
        { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
        { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
 
-       { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
-       { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
-       { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
-       { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+       /* PRRR/MAIR0 */
+       { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
+       /* NMRR/MAIR1 */
+       { AA32(HI), Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, MAIR_EL1 },
+       /* AMAIR0 */
+       { AA32(LO), Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, AMAIR_EL1 },
+       /* AMAIR1 */
+       { AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 },
 
        /* ICC_SRE */
        { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
 
-       { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+       { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 },
 
        /* Arch Tmers */
        { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer },
@@ -2102,14 +1992,14 @@ static const struct sys_reg_desc cp15_regs[] = {
 
        { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr },
        { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr },
-       { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR },
+       { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 },
 };
 
 static const struct sys_reg_desc cp15_64_regs[] = {
-       { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+       { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 },
        { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
        { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
-       { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+       { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 },
        { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
        { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
        { SYS_DESC(SYS_AARCH32_CNTP_CVAL),    access_arch_timer },
@@ -2180,7 +2070,7 @@ static void perform_access(struct kvm_vcpu *vcpu,
 
        /* Skip instruction if instructed so */
        if (likely(r->access(vcpu, params, r)))
-               kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+               kvm_incr_pc(vcpu);
 }
 
 /*
@@ -2253,8 +2143,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
        int Rt = kvm_vcpu_sys_get_rt(vcpu);
        int Rt2 = (esr >> 10) & 0x1f;
 
-       params.is_aarch32 = true;
-       params.is_32bit = false;
        params.CRm = (esr >> 1) & 0xf;
        params.is_write = ((esr & 1) == 0);
 
@@ -2304,8 +2192,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
        u32 esr = kvm_vcpu_get_esr(vcpu);
        int Rt  = kvm_vcpu_sys_get_rt(vcpu);
 
-       params.is_aarch32 = true;
-       params.is_32bit = true;
        params.CRm = (esr >> 1) & 0xf;
        params.regval = vcpu_get_reg(vcpu, Rt);
        params.is_write = ((esr & 1) == 0);
@@ -2399,8 +2285,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
        trace_kvm_handle_sys_reg(esr);
 
-       params.is_aarch32 = false;
-       params.is_32bit = false;
        params.Op0 = (esr >> 20) & 3;
        params.Op1 = (esr >> 14) & 0x7;
        params.CRn = (esr >> 10) & 0xf;
index 0f95964..9d06214 100644 (file)
@@ -19,14 +19,18 @@ struct sys_reg_params {
        u8      Op2;
        u64     regval;
        bool    is_write;
-       bool    is_aarch32;
-       bool    is_32bit;       /* Only valid if is_aarch32 is true */
 };
 
 struct sys_reg_desc {
        /* Sysreg string for debug */
        const char *name;
 
+       enum {
+               AA32_ZEROHIGH,
+               AA32_LO,
+               AA32_HI,
+       } aarch32_map;
+
        /* MRS/MSR instruction which accesses it. */
        u8      Op0;
        u8      Op1;
@@ -153,6 +157,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id,
                                          const struct sys_reg_desc table[],
                                          unsigned int num);
 
+#define AA32(_x)       .aarch32_map = AA32_##_x
 #define Op0(_x)        .Op0 = _x
 #define Op1(_x)        .Op1 = _x
 #define CRn(_x)                .CRn = _x
index e0404bc..d8cc51b 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/insn.h>
 #include <asm/kvm_mmu.h>
+#include <asm/memory.h>
 
 /*
  * The LSB of the HYP VA tag
@@ -23,6 +24,30 @@ static u64 tag_val;
 static u64 va_mask;
 
 /*
+ * Compute HYP VA by using the same computation as kern_hyp_va().
+ */
+static u64 __early_kern_hyp_va(u64 addr)
+{
+       addr &= va_mask;
+       addr |= tag_val << tag_lsb;
+       return addr;
+}
+
+/*
+ * Store a hyp VA <-> PA offset into a hyp-owned variable.
+ */
+static void init_hyp_physvirt_offset(void)
+{
+       extern s64 kvm_nvhe_sym(hyp_physvirt_offset);
+       u64 kern_va, hyp_va;
+
+       /* Compute the offset from the hyp VA and PA of a random symbol. */
+       kern_va = (u64)kvm_ksym_ref(__hyp_text_start);
+       hyp_va = __early_kern_hyp_va(kern_va);
+       CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va;
+}
+
+/*
  * We want to generate a hyp VA with the following format (with V ==
  * vabits_actual):
  *
@@ -53,6 +78,8 @@ __init void kvm_compute_layout(void)
                tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb);
        }
        tag_val >>= tag_lsb;
+
+       init_hyp_physvirt_offset();
 }
 
 static u32 compute_instruction(int n, u32 rd, u32 rn)
@@ -131,28 +158,21 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
        }
 }
 
-void *__kvm_bp_vect_base;
-int __kvm_harden_el2_vector_slot;
-
 void kvm_patch_vector_branch(struct alt_instr *alt,
                             __le32 *origptr, __le32 *updptr, int nr_inst)
 {
        u64 addr;
        u32 insn;
 
-       BUG_ON(nr_inst != 5);
+       BUG_ON(nr_inst != 4);
 
-       if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
-               WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+       if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe()))
                return;
-       }
 
        /*
         * Compute HYP VA by using the same computation as kern_hyp_va()
         */
-       addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
-       addr &= va_mask;
-       addr |= tag_val << tag_lsb;
+       addr = __early_kern_hyp_va((u64)kvm_ksym_ref(__kvm_hyp_vector));
 
        /* Use PC[10:7] to branch to the same vector in KVM */
        addr |= ((u64)origptr & GENMASK_ULL(10, 7));
@@ -163,15 +183,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
         */
        addr += KVM_VECTOR_PREAMBLE;
 
-       /* stp x0, x1, [sp, #-16]! */
-       insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
-                                               AARCH64_INSN_REG_1,
-                                               AARCH64_INSN_REG_SP,
-                                               -16,
-                                               AARCH64_INSN_VARIANT_64BIT,
-                                               AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
-       *updptr++ = cpu_to_le32(insn);
-
        /* movz x0, #(addr & 0xffff) */
        insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
                                         (u16)addr,
@@ -201,3 +212,58 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
                                           AARCH64_INSN_BRANCH_NOLINK);
        *updptr++ = cpu_to_le32(insn);
 }
+
+static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+       u32 insn, oinsn, rd;
+
+       BUG_ON(nr_inst != 4);
+
+       /* Compute target register */
+       oinsn = le32_to_cpu(*origptr);
+       rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+
+       /* movz rd, #(val & 0xffff) */
+       insn = aarch64_insn_gen_movewide(rd,
+                                        (u16)val,
+                                        0,
+                                        AARCH64_INSN_VARIANT_64BIT,
+                                        AARCH64_INSN_MOVEWIDE_ZERO);
+       *updptr++ = cpu_to_le32(insn);
+
+       /* movk rd, #((val >> 16) & 0xffff), lsl #16 */
+       insn = aarch64_insn_gen_movewide(rd,
+                                        (u16)(val >> 16),
+                                        16,
+                                        AARCH64_INSN_VARIANT_64BIT,
+                                        AARCH64_INSN_MOVEWIDE_KEEP);
+       *updptr++ = cpu_to_le32(insn);
+
+       /* movk rd, #((val >> 32) & 0xffff), lsl #32 */
+       insn = aarch64_insn_gen_movewide(rd,
+                                        (u16)(val >> 32),
+                                        32,
+                                        AARCH64_INSN_VARIANT_64BIT,
+                                        AARCH64_INSN_MOVEWIDE_KEEP);
+       *updptr++ = cpu_to_le32(insn);
+
+       /* movk rd, #((val >> 48) & 0xffff), lsl #48 */
+       insn = aarch64_insn_gen_movewide(rd,
+                                        (u16)(val >> 48),
+                                        48,
+                                        AARCH64_INSN_VARIANT_64BIT,
+                                        AARCH64_INSN_MOVEWIDE_KEEP);
+       *updptr++ = cpu_to_le32(insn);
+}
+
+void kvm_update_kimg_phys_offset(struct alt_instr *alt,
+                                __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+       generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
+}
+
+void kvm_get_kimage_voffset(struct alt_instr *alt,
+                           __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+       generate_mov_q(kimage_voffset, origptr, updptr, nr_inst);
+}
index 2f92bdc..07d5271 100644 (file)
@@ -268,8 +268,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id,
 
        params.regval = *reg;
        params.is_write = is_write;
-       params.is_aarch32 = false;
-       params.is_32bit = false;
 
        if (find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
                              ARRAY_SIZE(gic_v3_icc_reg_descs)))
@@ -288,8 +286,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id,
        if (is_write)
                params.regval = *reg;
        params.is_write = is_write;
-       params.is_aarch32 = false;
-       params.is_32bit = false;
 
        r = find_reg_by_id(sysreg, &params, gic_v3_icc_reg_descs,
                           ARRAY_SIZE(gic_v3_icc_reg_descs));
index b5fa73c..66508b0 100644 (file)
@@ -353,6 +353,18 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
        return err;
 }
 
+void vgic_v4_commit(struct kvm_vcpu *vcpu)
+{
+       struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+
+       /*
+        * No need to wait for the vPE to be ready across a shallow guest
+        * exit, as only a vcpu_put will invalidate it.
+        */
+       if (!vpe->ready)
+               its_commit_vpe(vpe);
+}
+
 static struct vgic_its *vgic_get_its(struct kvm *kvm,
                                     struct kvm_kernel_irq_routing_entry *irq_entry)
 {
index c3643b7..1c597c9 100644 (file)
@@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
        if (can_access_vgic_from_kernel())
                vgic_restore_state(vcpu);
+
+       if (vgic_supports_direct_msis(vcpu->kvm))
+               vgic_v4_commit(vcpu);
 }
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu)
index 351537c..9e1a12e 100644 (file)
@@ -149,3 +149,19 @@ SYM_FUNC_START(mte_restore_page_tags)
 
        ret
 SYM_FUNC_END(mte_restore_page_tags)
+
+/*
+ * Assign allocation tags for a region of memory based on the pointer tag
+ *   x0 - source pointer
+ *   x1 - size
+ *
+ * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
+ * size must be non-zero and MTE_GRANULE_SIZE aligned.
+ */
+SYM_FUNC_START(mte_assign_mem_tag_range)
+1:     stg     x0, [x0]
+       add     x0, x0, #MTE_GRANULE_SIZE
+       subs    x1, x1, #MTE_GRANULE_SIZE
+       b.gt    1b
+       ret
+SYM_FUNC_END(mte_assign_mem_tag_range)
index 70a71f3..b5447e5 100644 (file)
@@ -23,6 +23,15 @@ void copy_highpage(struct page *to, struct page *from)
 
        if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
                set_bit(PG_mte_tagged, &to->flags);
+               page_kasan_tag_reset(to);
+               /*
+                * We need smp_wmb() in between setting the flags and clearing the
+                * tags because if another thread reads page->flags and builds a
+                * tagged address out of it, there is an actual dependency to the
+                * memory access, but on the current thread we do not guarantee that
+                * the new page->flags are visible before the tags were updated.
+                */
+               smp_wmb();
                mte_copy_page_tags(kto, kfrom);
        }
 }
index 2848952..3c40da4 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/hardirq.h>
 #include <linux/init.h>
+#include <linux/kasan.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/page-flags.h>
@@ -33,6 +34,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kprobes.h>
+#include <asm/mte.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
@@ -296,6 +298,57 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
        do_exit(SIGKILL);
 }
 
+#ifdef CONFIG_KASAN_HW_TAGS
+static void report_tag_fault(unsigned long addr, unsigned int esr,
+                            struct pt_regs *regs)
+{
+       bool is_write  = ((esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT) != 0;
+
+       /*
+        * SAS bits aren't set for all faults reported in EL1, so we can't
+        * find out access size.
+        */
+       kasan_report(addr, 0, is_write, regs->pc);
+}
+#else
+/* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
+static inline void report_tag_fault(unsigned long addr, unsigned int esr,
+                                   struct pt_regs *regs) { }
+#endif
+
+static void do_tag_recovery(unsigned long addr, unsigned int esr,
+                          struct pt_regs *regs)
+{
+       static bool reported;
+
+       if (!READ_ONCE(reported)) {
+               report_tag_fault(addr, esr, regs);
+               WRITE_ONCE(reported, true);
+       }
+
+       /*
+        * Disable MTE Tag Checking on the local CPU for the current EL.
+        * It will be done lazily on the other CPUs when they will hit a
+        * tag fault.
+        */
+       sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_NONE);
+       isb();
+}
+
+static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
+{
+       unsigned int ec = ESR_ELx_EC(esr);
+       unsigned int fsc = esr & ESR_ELx_FSC;
+
+       if (ec != ESR_ELx_EC_DABT_CUR)
+               return false;
+
+       if (fsc == ESR_ELx_FSC_MTE)
+               return true;
+
+       return false;
+}
+
 static void __do_kernel_fault(unsigned long addr, unsigned int esr,
                              struct pt_regs *regs)
 {
@@ -312,6 +365,12 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
            "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr))
                return;
 
+       if (is_el1_mte_sync_tag_check_fault(esr)) {
+               do_tag_recovery(addr, esr, regs);
+
+               return;
+       }
+
        if (is_el1_permission_fault(addr, esr, regs)) {
                if (esr & ESR_ELx_WNR)
                        msg = "write to read-only memory";
index 69d4251..75addb3 100644 (file)
@@ -295,6 +295,9 @@ void __init arm64_memblock_init(void)
        memstart_addr = round_down(memblock_start_of_DRAM(),
                                   ARM64_MEMSTART_ALIGN);
 
+       if ((memblock_end_of_DRAM() - memstart_addr) > linear_region_size)
+               pr_warn("Memory doesn't fit in the linear mapping, VA_BITS too small\n");
+
        /*
         * Remove the memory that we will not be able to cover with the
         * linear mapping. Take care not to clip the kernel which may be
index b24e43d..d8e66c7 100644 (file)
@@ -21,6 +21,8 @@
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
 
 /*
@@ -208,7 +210,7 @@ static void __init clear_pgds(unsigned long start,
                set_pgd(pgd_offset_k(start), __pgd(0));
 }
 
-void __init kasan_init(void)
+static void __init kasan_init_shadow(void)
 {
        u64 kimg_shadow_start, kimg_shadow_end;
        u64 mod_shadow_start, mod_shadow_end;
@@ -269,8 +271,21 @@ void __init kasan_init(void)
 
        memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
        cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+}
 
-       /* At this point kasan is fully initialized. Enable error messages */
+static void __init kasan_init_depth(void)
+{
        init_task.kasan_depth = 0;
+}
+
+void __init kasan_init(void)
+{
+       kasan_init_shadow();
+       kasan_init_depth();
+#if defined(CONFIG_KASAN_GENERIC)
+       /* CONFIG_KASAN_SW_TAGS also requires kasan_init_sw_tags(). */
        pr_info("KernelAddressSanitizer initialized\n");
+#endif
 }
+
+#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
index 3028bac..07937b4 100644 (file)
@@ -47,24 +47,3 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 {
        return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
 }
-
-#ifdef CONFIG_STRICT_DEVMEM
-
-#include <linux/ioport.h>
-
-/*
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
- * is valid. The argument is a physical page number.  We mimic x86 here by
- * disallowing access to system RAM as well as device-exclusive MMIO regions.
- * This effectively disable read()/write() on /dev/mem.
- */
-int devmem_is_allowed(unsigned long pfn)
-{
-       if (iomem_is_exclusive(pfn << PAGE_SHIFT))
-               return 0;
-       if (!page_is_ram(pfn))
-               return 1;
-       return 0;
-}
-
-#endif
index c52c184..7c4ef56 100644 (file)
@@ -53,6 +53,15 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
        if (!tags)
                return false;
 
+       page_kasan_tag_reset(page);
+       /*
+        * We need smp_wmb() in between setting the flags and clearing the
+        * tags because if another thread reads page->flags and builds a
+        * tagged address out of it, there is an actual dependency to the
+        * memory access, but on the current thread we do not guarantee that
+        * the new page->flags are visible before the tags were updated.
+        */
+       smp_wmb();
        mte_restore_page_tags(page_address(page), tags);
 
        return true;
index a0831bf..37a54b5 100644 (file)
 #define TCR_CACHE_FLAGS        TCR_IRGN_WBWA | TCR_ORGN_WBWA
 
 #ifdef CONFIG_KASAN_SW_TAGS
-#define TCR_KASAN_FLAGS TCR_TBI1 | TCR_TBID1
+#define TCR_KASAN_SW_FLAGS TCR_TBI1 | TCR_TBID1
 #else
-#define TCR_KASAN_FLAGS 0
+#define TCR_KASAN_SW_FLAGS 0
+#endif
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1
+#else
+#define TCR_KASAN_HW_FLAGS 0
 #endif
 
 /*
@@ -427,6 +433,10 @@ SYM_FUNC_START(__cpu_setup)
         */
        mov_q   x5, MAIR_EL1_SET
 #ifdef CONFIG_ARM64_MTE
+       mte_tcr .req    x20
+
+       mov     mte_tcr, #0
+
        /*
         * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
         * (ID_AA64PFR1_EL1[11:8] > 1).
@@ -447,6 +457,9 @@ SYM_FUNC_START(__cpu_setup)
        /* clear any pending tag check faults in TFSR*_EL1 */
        msr_s   SYS_TFSR_EL1, xzr
        msr_s   SYS_TFSRE0_EL1, xzr
+
+       /* set the TCR_EL1 bits */
+       mov_q   mte_tcr, TCR_KASAN_HW_FLAGS
 1:
 #endif
        msr     mair_el1, x5
@@ -456,7 +469,11 @@ SYM_FUNC_START(__cpu_setup)
         */
        mov_q   x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
                        TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
-                       TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS
+                       TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
+#ifdef CONFIG_ARM64_MTE
+       orr     x10, x10, mte_tcr
+       .unreq  mte_tcr
+#endif
        tcr_clear_errata_bits x10, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
index 807dc63..04137a8 100644 (file)
@@ -29,7 +29,7 @@
 enum address_markers_idx {
        PAGE_OFFSET_NR = 0,
        PAGE_END_NR,
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        KASAN_START_NR,
 #endif
 };
@@ -37,7 +37,7 @@ enum address_markers_idx {
 static struct addr_marker address_markers[] = {
        { PAGE_OFFSET,                  "Linear Mapping start" },
        { 0 /* PAGE_END */,             "Linear Mapping end" },
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        { 0 /* KASAN_SHADOW_START */,   "Kasan shadow start" },
        { KASAN_SHADOW_END,             "Kasan shadow end" },
 #endif
@@ -383,7 +383,7 @@ void ptdump_check_wx(void)
 static int ptdump_init(void)
 {
        address_markers[PAGE_END_NR].start_address = PAGE_END;
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START;
 #endif
        ptdump_initialize();
index b96ed8b..bfc00f2 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2
index c55276e..bfd1b67 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 #
 # Usage: unwcheck.py FILE
index 4f7d4b4..674541f 100644 (file)
 
 # 68328, 68EZ328, 68VZ328
 
-obj-y                  += entry.o ints.o timers.o
-obj-$(CONFIG_M68328)   += m68328.o
-obj-$(CONFIG_M68EZ328) += m68EZ328.o
-obj-$(CONFIG_M68VZ328) += m68VZ328.o
+obj-y                  += entry.o ints.o timers.o m68328.o
 obj-$(CONFIG_ROM)      += romvec.o
 
+obj-$(CONFIG_DRAGEN2)  += dragen2.o
+obj-$(CONFIG_UCSIMM)   += ucsimm.o
+obj-$(CONFIG_UCDIMM)   += ucsimm.o
+
 extra-y                := head.o
diff --git a/arch/m68k/68000/dragen2.c b/arch/m68k/68000/dragen2.c
new file mode 100644 (file)
index 0000000..584893c
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1993 Hamish Macdonald
+ *  Copyright (C) 1999 D. Jeff Dionne
+ *  Copyright (C) 2001 Georges Menie, Ken Desmet
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <asm/machdep.h>
+#include <asm/MC68VZ328.h>
+
+/***************************************************************************/
+/*                        Init Drangon Engine hardware                     */
+/***************************************************************************/
+
+static void dragen2_reset(void)
+{
+       local_irq_disable();
+
+#ifdef CONFIG_INIT_LCD
+       PBDATA |= 0x20;                         /* disable CCFL light */
+       PKDATA |= 0x4;                          /* disable LCD controller */
+       LCKCON = 0;
+#endif
+
+       __asm__ __volatile__(
+               "reset\n\t"
+               "moveal #0x04000000, %a0\n\t"
+               "moveal 0(%a0), %sp\n\t"
+               "moveal 4(%a0), %a0\n\t"
+               "jmp (%a0)"
+       );
+}
+
+void __init init_dragen2(char *command, int size)
+{
+       mach_reset = dragen2_reset;
+
+#ifdef CONFIG_DIRECT_IO_ACCESS
+       SCR = 0x10;                                     /* allow user access to internal registers */
+#endif
+
+       /* CSGB Init */
+       CSGBB = 0x4000;
+       CSB = 0x1a1;
+
+       /* CS8900 init */
+       /* PK3: hardware sleep function pin, active low */
+       PKSEL |= PK(3);                         /* select pin as I/O */
+       PKDIR |= PK(3);                         /* select pin as output */
+       PKDATA |= PK(3);                        /* set pin high */
+
+       /* PF5: hardware reset function pin, active high */
+       PFSEL |= PF(5);                         /* select pin as I/O */
+       PFDIR |= PF(5);                         /* select pin as output */
+       PFDATA &= ~PF(5);                       /* set pin low */
+
+       /* cs8900 hardware reset */
+       PFDATA |= PF(5);
+       { int i; for (i = 0; i < 32000; ++i); }
+       PFDATA &= ~PF(5);
+
+       /* INT1 enable (cs8900 IRQ) */
+       PDPOL &= ~PD(1);                        /* active high signal */
+       PDIQEG &= ~PD(1);
+       PDIRQEN |= PD(1);                       /* IRQ enabled */
+
+#ifdef CONFIG_INIT_LCD
+       /* initialize LCD controller */
+       LSSA = (long) screen_bits;
+       LVPW = 0x14;
+       LXMAX = 0x140;
+       LYMAX = 0xef;
+       LRRA = 0;
+       LPXCD = 3;
+       LPICF = 0x08;
+       LPOLCF = 0;
+       LCKCON = 0x80;
+       PCPDEN = 0xff;
+       PCSEL = 0;
+
+       /* Enable LCD controller */
+       PKDIR |= 0x4;
+       PKSEL |= 0x4;
+       PKDATA &= ~0x4;
+
+       /* Enable CCFL backlighting circuit */
+       PBDIR |= 0x20;
+       PBSEL |= 0x20;
+       PBDATA &= ~0x20;
+
+       /* contrast control register */
+       PFDIR |= 0x1;
+       PFSEL &= ~0x1;
+       PWMR = 0x037F;
+#endif
+}
index 419751b..eab08da 100644 (file)
@@ -1,10 +1,11 @@
 /***************************************************************************/
 
 /*
- *  m68328.c - 68328 specific config
+ *  m68328.c - 68328/68EZ328/68VZ328 specific config
  *
  *  Copyright (C) 1993 Hamish Macdonald
  *  Copyright (C) 1999 D. Jeff Dionne
+ *  Copyright (C) 2001 Georges Menie, Ken Desmet
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file COPYING in the main directory of this archive
 #include <linux/kernel.h>
 #include <linux/rtc.h>
 #include <asm/machdep.h>
-#include <asm/MC68328.h>
-#if defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD)
+
+#if defined(CONFIG_INIT_LCD) && defined(CONFIG_M68VZ328)
+#include "bootlogo-vz.h"
+#elif defined(CONFIG_PILOT) || defined(CONFIG_INIT_LCD)
 #include "bootlogo.h"
 #endif
 
-/***************************************************************************/
-
-int m68328_hwclk(int set, struct rtc_time *t);
+#include "m68328.h"
 
 /***************************************************************************/
 
-void m68328_reset (void)
+static void m68328_reset(void)
 {
   local_irq_disable();
   asm volatile ("moveal #0x10c00000, %a0;\n\t"
@@ -45,12 +46,19 @@ void m68328_reset (void)
 
 void __init config_BSP(char *command, int len)
 {
-  pr_info("68328 support D. Jeff Dionne <jeff@uclinux.org>\n");
-  pr_info("68328 support Kenneth Albanowski <kjahds@kjshds.com>\n");
-  pr_info("68328/Pilot support Bernhard Kuhn <kuhn@lpr.e-technik.tu-muenchen.de>\n");
+       mach_sched_init = hw_timer_init;
+       mach_hwclk = m68328_hwclk;
+       mach_reset = m68328_reset;
 
-  mach_hwclk = m68328_hwclk;
-  mach_reset = m68328_reset;
+#if   defined(CONFIG_PILOT) && defined(CONFIG_M68328)
+       mach_sched_init = NULL;
+#elif defined(CONFIG_UCSIMM)
+       init_ucsimm(command, len);
+#elif defined(CONFIG_UCDIMM)
+       init_ucsimm(command, len);
+#elif defined(CONFIG_DRAGEN2)
+       init_dragen2(command, len);
+#endif
 }
 
 /***************************************************************************/
diff --git a/arch/m68k/68000/m68328.h b/arch/m68k/68000/m68328.h
new file mode 100644 (file)
index 0000000..f6047c3
--- /dev/null
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+void init_dragen2(char *command, int size);
+void init_ucsimm(char *command, int size);
+struct rtc_time;
+int m68328_hwclk(int set, struct rtc_time *t);
diff --git a/arch/m68k/68000/m68EZ328.c b/arch/m68k/68000/m68EZ328.c
deleted file mode 100644 (file)
index 05f137d..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/***************************************************************************/
-
-/*
- *  m68EZ328.c - 68EZ328 specific config
- *
- *  Copyright (C) 1993 Hamish Macdonald
- *  Copyright (C) 1999 D. Jeff Dionne
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/***************************************************************************/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/rtc.h>
-#include <linux/pgtable.h>
-#include <asm/machdep.h>
-#include <asm/MC68EZ328.h>
-#ifdef CONFIG_UCSIMM
-#include <asm/bootstd.h>
-#endif
-
-/***************************************************************************/
-
-int m68328_hwclk(int set, struct rtc_time *t);
-
-/***************************************************************************/
-
-void m68ez328_reset(void)
-{
-  local_irq_disable();
-  asm volatile (
-    "moveal #0x10c00000, %a0;\n"
-    "moveb #0, 0xFFFFF300;\n"
-    "moveal 0(%a0), %sp;\n"
-    "moveal 4(%a0), %a0;\n"
-    "jmp (%a0);\n"
-    );
-}
-
-/***************************************************************************/
-
-unsigned char *cs8900a_hwaddr;
-static int errno;
-
-#ifdef CONFIG_UCSIMM
-_bsc0(char *, getserialnum)
-_bsc1(unsigned char *, gethwaddr, int, a)
-_bsc1(char *, getbenv, char *, a)
-#endif
-
-void __init config_BSP(char *command, int len)
-{
-  unsigned char *p;
-
-  pr_info("68EZ328 DragonBallEZ support (C) 1999 Rt-Control, Inc\n");
-
-#ifdef CONFIG_UCSIMM
-  pr_info("uCsimm serial string [%s]\n", getserialnum());
-  p = cs8900a_hwaddr = gethwaddr(0);
-  pr_info("uCsimm hwaddr %pM\n", p);
-
-  p = getbenv("APPEND");
-  if (p) strcpy(p,command);
-  else command[0] = 0;
-#endif
-
-  mach_sched_init = hw_timer_init;
-  mach_hwclk = m68328_hwclk;
-  mach_reset = m68ez328_reset;
-}
-
-/***************************************************************************/
diff --git a/arch/m68k/68000/m68VZ328.c b/arch/m68k/68000/m68VZ328.c
deleted file mode 100644 (file)
index ada87b2..0000000
+++ /dev/null
@@ -1,189 +0,0 @@
-/***************************************************************************/
-
-/*
- *  m68VZ328.c - 68VZ328 specific config
- *
- *  Copyright (C) 1993 Hamish Macdonald
- *  Copyright (C) 1999 D. Jeff Dionne
- *  Copyright (C) 2001 Georges Menie, Ken Desmet
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive
- * for more details.
- */
-
-/***************************************************************************/
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/kd.h>
-#include <linux/netdevice.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/rtc.h>
-#include <linux/pgtable.h>
-
-#include <asm/machdep.h>
-#include <asm/MC68VZ328.h>
-#include <asm/bootstd.h>
-
-#ifdef CONFIG_INIT_LCD
-#include "bootlogo-vz.h"
-#endif
-
-/***************************************************************************/
-
-int m68328_hwclk(int set, struct rtc_time *t);
-
-/***************************************************************************/
-/*                        Init Drangon Engine hardware                     */
-/***************************************************************************/
-#if defined(CONFIG_DRAGEN2)
-
-static void m68vz328_reset(void)
-{
-       local_irq_disable();
-
-#ifdef CONFIG_INIT_LCD
-       PBDATA |= 0x20;                         /* disable CCFL light */
-       PKDATA |= 0x4;                          /* disable LCD controller */
-       LCKCON = 0;
-#endif
-
-       __asm__ __volatile__(
-               "reset\n\t"
-               "moveal #0x04000000, %a0\n\t"
-               "moveal 0(%a0), %sp\n\t"
-               "moveal 4(%a0), %a0\n\t"
-               "jmp (%a0)"
-       );
-}
-
-static void __init init_hardware(char *command, int size)
-{
-#ifdef CONFIG_DIRECT_IO_ACCESS
-       SCR = 0x10;                                     /* allow user access to internal registers */
-#endif
-
-       /* CSGB Init */
-       CSGBB = 0x4000;
-       CSB = 0x1a1;
-
-       /* CS8900 init */
-       /* PK3: hardware sleep function pin, active low */
-       PKSEL |= PK(3);                         /* select pin as I/O */
-       PKDIR |= PK(3);                         /* select pin as output */
-       PKDATA |= PK(3);                        /* set pin high */
-
-       /* PF5: hardware reset function pin, active high */
-       PFSEL |= PF(5);                         /* select pin as I/O */
-       PFDIR |= PF(5);                         /* select pin as output */
-       PFDATA &= ~PF(5);                       /* set pin low */
-
-       /* cs8900 hardware reset */
-       PFDATA |= PF(5);
-       { int i; for (i = 0; i < 32000; ++i); }
-       PFDATA &= ~PF(5);
-
-       /* INT1 enable (cs8900 IRQ) */
-       PDPOL &= ~PD(1);                        /* active high signal */
-       PDIQEG &= ~PD(1);
-       PDIRQEN |= PD(1);                       /* IRQ enabled */
-
-#ifdef CONFIG_INIT_LCD
-       /* initialize LCD controller */
-       LSSA = (long) screen_bits;
-       LVPW = 0x14;
-       LXMAX = 0x140;
-       LYMAX = 0xef;
-       LRRA = 0;
-       LPXCD = 3;
-       LPICF = 0x08;
-       LPOLCF = 0;
-       LCKCON = 0x80;
-       PCPDEN = 0xff;
-       PCSEL = 0;
-
-       /* Enable LCD controller */
-       PKDIR |= 0x4;
-       PKSEL |= 0x4;
-       PKDATA &= ~0x4;
-
-       /* Enable CCFL backlighting circuit */
-       PBDIR |= 0x20;
-       PBSEL |= 0x20;
-       PBDATA &= ~0x20;
-
-       /* contrast control register */
-       PFDIR |= 0x1;
-       PFSEL &= ~0x1;
-       PWMR = 0x037F;
-#endif
-}
-
-/***************************************************************************/
-/*                      Init RT-Control uCdimm hardware                    */
-/***************************************************************************/
-#elif defined(CONFIG_UCDIMM)
-
-static void m68vz328_reset(void)
-{
-       local_irq_disable();
-       asm volatile (
-               "moveal #0x10c00000, %a0;\n\t"
-               "moveb #0, 0xFFFFF300;\n\t"
-               "moveal 0(%a0), %sp;\n\t"
-               "moveal 4(%a0), %a0;\n\t"
-               "jmp (%a0);\n"
-       );
-}
-
-unsigned char *cs8900a_hwaddr;
-static int errno;
-
-_bsc0(char *, getserialnum)
-_bsc1(unsigned char *, gethwaddr, int, a)
-_bsc1(char *, getbenv, char *, a)
-
-static void __init init_hardware(char *command, int size)
-{
-       char *p;
-
-       pr_info("uCdimm serial string [%s]\n", getserialnum());
-       p = cs8900a_hwaddr = gethwaddr(0);
-       pr_info("uCdimm hwaddr %pM\n", p);
-       p = getbenv("APPEND");
-       if (p)
-               strcpy(p, command);
-       else
-               command[0] = 0;
-}
-
-/***************************************************************************/
-#else
-
-static void m68vz328_reset(void)
-{
-}
-
-static void __init init_hardware(char *command, int size)
-{
-}
-
-/***************************************************************************/
-#endif
-/***************************************************************************/
-
-void __init config_BSP(char *command, int size)
-{
-       pr_info("68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n");
-
-       init_hardware(command, size);
-
-       mach_sched_init = hw_timer_init;
-       mach_hwclk = m68328_hwclk;
-       mach_reset = m68vz328_reset;
-}
-
-/***************************************************************************/
diff --git a/arch/m68k/68000/ucsimm.c b/arch/m68k/68000/ucsimm.c
new file mode 100644 (file)
index 0000000..7c6cbf6
--- /dev/null
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 1993 Hamish Macdonald
+ *  Copyright (C) 1999 D. Jeff Dionne
+ *  Copyright (C) 2001 Georges Menie, Ken Desmet
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+#include <linux/init.h>
+#include <asm/bootstd.h>
+#include <asm/machdep.h>
+#include <asm/MC68VZ328.h>
+
+
+#include "m68328.h"
+
+unsigned char *cs8900a_hwaddr;
+static int errno;
+
+_bsc0(char *, getserialnum)
+_bsc1(unsigned char *, gethwaddr, int, a)
+_bsc1(char *, getbenv, char *, a)
+
+void __init init_ucsimm(char *command, int size)
+{
+       char *p;
+
+       pr_info("uCsimm/uCdimm serial string [%s]\n", getserialnum());
+       p = cs8900a_hwaddr = gethwaddr(0);
+       pr_info("uCsimm/uCdimm hwaddr %pM\n", p);
+       p = getbenv("APPEND");
+       if (p)
+               strcpy(p, command);
+       else
+               command[0] = 0;
+}
index 7246aa5..f4d2397 100644 (file)
@@ -36,7 +36,7 @@ endchoice
 if M68KCLASSIC
 
 config M68000
-       bool "MC68000"
+       bool
        depends on !MMU
        select CPU_HAS_NO_BITFIELDS
        select CPU_HAS_NO_MULDIV64
@@ -103,7 +103,7 @@ config M68060
          processor, say Y. Otherwise, say N.
 
 config M68328
-       bool "MC68328"
+       bool
        depends on !MMU
        select LEGACY_TIMER_TICK
        select M68000
@@ -111,7 +111,7 @@ config M68328
          Motorola 68328 processor support.
 
 config M68EZ328
-       bool "MC68EZ328"
+       bool
        depends on !MMU
        select LEGACY_TIMER_TICK
        select M68000
@@ -119,7 +119,7 @@ config M68EZ328
          Motorola 68EX328 processor support.
 
 config M68VZ328
-       bool "MC68VZ328"
+       bool
        depends on !MMU
        select LEGACY_TIMER_TICK
        select M68000
index cf6961d..4d59ec2 100644 (file)
@@ -145,14 +145,13 @@ config SUN3
 
          If you don't want to compile a kernel exclusively for a Sun 3, say N.
 
-endif # M68KCLASSIC
-
 config PILOT
        bool
 
 config PILOT3
        bool "Pilot 1000/5000, PalmPilot Personal/Pro, or PalmIII support"
-       depends on M68328
+       depends on !MMU
+       select M68328
        select PILOT
        help
          Support for the Palm Pilot 1000/5000, Personal/Pro and PalmIII.
@@ -165,19 +164,22 @@ config XCOPILOT_BUGS
 
 config UCSIMM
        bool "uCsimm module support"
-       depends on M68EZ328
+       depends on !MMU
+       select M68EZ328
        help
          Support for the Arcturus Networks uCsimm module.
 
 config UCDIMM
        bool "uDsimm module support"
-       depends on M68VZ328
+       depends on !MMU
+       select M68VZ328
        help
          Support for the Arcturus Networks uDsimm module.
 
 config DRAGEN2
        bool "DragenEngine II board support"
-       depends on M68VZ328
+       depends on !MMU
+       select M68VZ328
        help
          Support for the DragenEngine II board.
 
@@ -200,6 +202,8 @@ config MEMORY_RESERVE
        help
          Reserve certain memory regions on 68x328 based boards.
 
+endif # M68KCLASSIC
+
 config ARN5206
        bool "Arnewsh 5206 board support"
        depends on M5206
index e377b42..d1b7988 100644 (file)
@@ -106,8 +106,16 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_UCDIMM
        pr_info("uCdimm by Lineo, Inc. <www.lineo.com>\n");
 #endif
+#ifdef CONFIG_M68328
+       pr_info("68328 support D. Jeff Dionne <jeff@uclinux.org>\n");
+       pr_info("68328 support Kenneth Albanowski <kjahds@kjshds.com>\n");
+#endif
+#ifdef CONFIG_M68EZ328
+       pr_info("68EZ328 DragonBallEZ support (C) 1999 Rt-Control, Inc\n");
+#endif
 #ifdef CONFIG_M68VZ328
        pr_info("M68VZ328 support by Evan Stawnyczy <e@lineo.ca>\n");
+       pr_info("68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n");
 #endif
 #ifdef CONFIG_COLDFIRE
        pr_info("COLDFIRE port done by Greg Ungerer, gerg@snapgear.com\n");
@@ -121,6 +129,7 @@ void __init setup_arch(char **cmdline_p)
        pr_info("Flat model support (C) 1998,1999 Kenneth Albanowski, D. Jeff Dionne\n");
 
 #if defined( CONFIG_PILOT ) && defined( CONFIG_M68328 )
+       pr_info("68328/Pilot support Bernhard Kuhn <kuhn@lpr.e-technik.tu-muenchen.de>\n");
        pr_info("TRG SuperPilot FLASH card support <info@trgnet.com>\n");
 #endif
 #if defined( CONFIG_PILOT ) && defined( CONFIG_M68EZ328 )
index 625fb6d..7fe4e45 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2
index 396e126..387f334 100644 (file)
@@ -81,7 +81,7 @@ SECTIONS {
                __init_end = .;
        }
 
-       BSS_SECTION(0, 0, 0)
+       BSS_SECTION(4, 0, 4)
 
        _end = .;
 
index aae729c..a522adf 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2
index 32817c9..0f03ad2 100644 (file)
 438    n32     pidfd_getfd                     sys_pidfd_getfd
 439    n32     faccessat2                      sys_faccessat2
 440    n32     process_madvise                 sys_process_madvise
+441    n32     epoll_pwait2                    compat_sys_epoll_pwait2
index 9e4ea3c..9164969 100644 (file)
 438    n64     pidfd_getfd                     sys_pidfd_getfd
 439    n64     faccessat2                      sys_faccessat2
 440    n64     process_madvise                 sys_process_madvise
+441    n64     epoll_pwait2                    sys_epoll_pwait2
index 29f5f28..4bad0c4 100644 (file)
 438    o32     pidfd_getfd                     sys_pidfd_getfd
 439    o32     faccessat2                      sys_faccessat2
 440    o32     process_madvise                 sys_process_madvise
+441    o32     epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
index f375ea5..6bcc319 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
index ae73916..107bb43 100644 (file)
@@ -161,6 +161,7 @@ config PPC
        select DCACHE_WORD_ACCESS               if PPC64 && CPU_LITTLE_ENDIAN
        select DMA_OPS                          if PPC64
        select DMA_OPS_BYPASS                   if PPC64
+       select ARCH_HAS_DMA_MAP_DIRECT          if PPC64 && PPC_PSERIES
        select DYNAMIC_FTRACE                   if FUNCTION_TRACER
        select EDAC_ATOMIC_SCRUB
        select EDAC_SUPPORT
index a1c7441..111249f 100644 (file)
 #include <linux/pci.h>
 #include <asm/iommu.h>
 
+#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
+#define can_map_direct(dev, addr) \
+       ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
+
+bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr)
+{
+       if (likely(!dev->bus_dma_limit))
+               return false;
+
+       return can_map_direct(dev, addr);
+}
+
+#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
+
+bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle)
+{
+       if (likely(!dev->bus_dma_limit))
+               return false;
+
+       return is_direct_handle(dev, dma_handle);
+}
+
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
+                           int nents)
+{
+       struct scatterlist *s;
+       int i;
+
+       if (likely(!dev->bus_dma_limit))
+               return false;
+
+       for_each_sg(sg, s, nents, i) {
+               if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
+                       return false;
+       }
+
+       return true;
+}
+
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
+                             int nents)
+{
+       struct scatterlist *s;
+       int i;
+
+       if (likely(!dev->bus_dma_limit))
+               return false;
+
+       for_each_sg(sg, s, nents, i) {
+               if (!is_direct_handle(dev, s->dma_address + s->length))
+                       return false;
+       }
+
+       return true;
+}
+#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
+
 /*
  * Generic iommu implementation
  */
@@ -90,8 +147,18 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
        struct iommu_table *tbl = get_iommu_table_base(dev);
 
        if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
-               dev->dma_ops_bypass = true;
-               dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+               /*
+                * dma_iommu_bypass_supported() sets dma_max when there is
+                * 1:1 mapping but it is somehow limited.
+                * ibm,pmemory is one example.
+                */
+               dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+               if (!dev->dma_ops_bypass)
+                       dev_warn(dev,
+                                "iommu: 64-bit OK but direct DMA is limited by %llx\n",
+                                dev->bus_dma_limit);
+               else
+                       dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
                return 1;
        }
 
index 1275dae..f744eb5 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
index e419870..9fc5217 100644 (file)
@@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
                        np, ret);
 }
 
-static u64 find_existing_ddw(struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
 {
        struct direct_window *window;
        const struct dynamic_dma_window_prop *direct64;
@@ -851,6 +851,7 @@ static u64 find_existing_ddw(struct device_node *pdn)
                if (window->device == pdn) {
                        direct64 = window->prop;
                        dma_addr = be64_to_cpu(direct64->dma_base);
+                       *window_shift = be32_to_cpu(direct64->window_shift);
                        break;
                }
        }
@@ -1111,11 +1112,12 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
  */
 static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 {
-       int len, ret;
+       int len = 0, ret;
+       int max_ram_len = order_base_2(ddw_memory_hotplug_max());
        struct ddw_query_response query;
        struct ddw_create_response create;
        int page_shift;
-       u64 dma_addr, max_addr;
+       u64 dma_addr;
        struct device_node *dn;
        u32 ddw_avail[DDW_APPLICABLE_SIZE];
        struct direct_window *window;
@@ -1123,10 +1125,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
        struct dynamic_dma_window_prop *ddwprop;
        struct failed_ddw_pdn *fpdn;
        bool default_win_removed = false;
+       bool pmem_present;
+
+       dn = of_find_node_by_type(NULL, "ibm,pmemory");
+       pmem_present = dn != NULL;
+       of_node_put(dn);
 
        mutex_lock(&direct_window_init_mutex);
 
-       dma_addr = find_existing_ddw(pdn);
+       dma_addr = find_existing_ddw(pdn, &len);
        if (dma_addr != 0)
                goto out_unlock;
 
@@ -1212,14 +1219,29 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
        }
        /* verify the window * number of ptes will map the partition */
        /* check largest block * page size > max memory hotplug addr */
-       max_addr = ddw_memory_hotplug_max();
-       if (query.largest_available_block < (max_addr >> page_shift)) {
-               dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
-                         "%llu-sized pages\n", max_addr,  query.largest_available_block,
-                         1ULL << page_shift);
+       /*
+        * The "ibm,pmemory" can appear anywhere in the address space.
+        * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
+        * for the upper limit and fallback to max RAM otherwise but this
+        * disables device::dma_ops_bypass.
+        */
+       len = max_ram_len;
+       if (pmem_present) {
+               if (query.largest_available_block >=
+                   (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
+                       len = MAX_PHYSMEM_BITS - page_shift;
+               else
+                       dev_info(&dev->dev, "Skipping ibm,pmemory");
+       }
+
+       if (query.largest_available_block < (1ULL << (len - page_shift))) {
+               dev_dbg(&dev->dev,
+                       "can't map partition max 0x%llx with %llu %llu-sized pages\n",
+                       1ULL << len,
+                       query.largest_available_block,
+                       1ULL << page_shift);
                goto out_failed;
        }
-       len = order_base_2(max_addr);
        win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
        if (!win64) {
                dev_info(&dev->dev,
@@ -1299,6 +1321,15 @@ out_failed:
 
 out_unlock:
        mutex_unlock(&direct_window_init_mutex);
+
+       /*
+        * If we have persistent memory and the window size is only as big
+        * as RAM, then we failed to create a window to cover persistent
+        * memory and need to set the DMA limit.
+        */
+       if (pmem_present && dma_addr && (len == max_ram_len))
+               dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
+
        return dma_addr;
 }
 
index 880c2b3..81b76d4 100644 (file)
@@ -15,6 +15,7 @@ config RISCV
        select ARCH_CLOCKSOURCE_INIT
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
+       select ARCH_STACKWALK
        select ARCH_HAS_BINFMT_FLAT
        select ARCH_HAS_DEBUG_VM_PGTABLE
        select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -43,6 +44,7 @@ config RISCV
        select GENERIC_IOREMAP
        select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_IRQ_SHOW
+       select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select GENERIC_PCI_IOMAP
        select GENERIC_PTDUMP if MMU
        select GENERIC_SCHED_CLOCK
@@ -68,6 +70,7 @@ config RISCV
        select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO if MMU && 64BIT
+       select HAVE_IRQ_TIME_ACCOUNTING
        select HAVE_PCI
        select HAVE_PERF_EVENTS
        select HAVE_PERF_REGS
index 8a55f61..3284d5c 100644 (file)
@@ -5,7 +5,7 @@ config SOC_SIFIVE
        select SERIAL_SIFIVE if TTY
        select SERIAL_SIFIVE_CONSOLE if TTY
        select CLK_SIFIVE
-       select CLK_SIFIVE_FU540_PRCI
+       select CLK_SIFIVE_PRCI
        select SIFIVE_PLIC
        help
          This enables support for SiFive SoC platform hardware.
index 0289a97..8c29e55 100644 (file)
@@ -96,5 +96,11 @@ $(BOOT_TARGETS): vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
        @$(kecho) '  Kernel: $(boot)/$@ is ready'
 
+Image.%: Image
+       $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
 zinstall install:
        $(Q)$(MAKE) $(build)=$(boot) $@
+
+archclean:
+       $(Q)$(MAKE) $(clean)=$(boot)
index 574c10f..90e66ad 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 Image
-Image.gz
+Image.*
 loader
 loader.lds
+loader.bin
index c59fca6..03404c8 100644 (file)
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT := n
 
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-targets := Image loader
+targets := Image Image.* loader loader.o loader.lds loader.bin
 
 $(obj)/Image: vmlinux FORCE
        $(call if_changed,objcopy)
index 3a9971b..1595c5b 100644 (file)
@@ -9,5 +9,7 @@
 
 extern char _start[];
 extern char _start_kernel[];
+extern char __init_data_begin[], __init_data_end[];
+extern char __init_text_begin[], __init_text_end[];
 
 #endif /* __ASM_SECTIONS_H */
index d690b08..211eb82 100644 (file)
@@ -15,11 +15,15 @@ int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_rw_nx(unsigned long addr, int numpages);
+void protect_kernel_text_data(void);
 #else
 static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+static inline void protect_kernel_text_data(void) {};
+static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
 int set_direct_map_invalid_noflush(struct page *page);
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
new file mode 100644 (file)
index 0000000..470a65c
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_STACKTRACE_H
+#define _ASM_RISCV_STACKTRACE_H
+
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+struct stackframe {
+       unsigned long fp;
+       unsigned long ra;
+};
+
+extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+                                   bool (*fn)(void *, unsigned long), void *arg);
+
+#endif /* _ASM_RISCV_STACKTRACE_H */
index 924af13..5477e7e 100644 (file)
 #define __HAVE_ARCH_MEMSET
 extern asmlinkage void *memset(void *, int, size_t);
 extern asmlinkage void *__memset(void *, int, size_t);
-
 #define __HAVE_ARCH_MEMCPY
 extern asmlinkage void *memcpy(void *, const void *, size_t);
 extern asmlinkage void *__memcpy(void *, const void *, size_t);
-
+#define __HAVE_ARCH_MEMMOVE
+extern asmlinkage void *memmove(void *, const void *, size_t);
+extern asmlinkage void *__memmove(void *, const void *, size_t);
 /* For those files which don't want to check by kasan. */
 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
-
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
 #define memset(s, c, n) __memset(s, c, n)
-
+#define memmove(dst, src, len) __memmove(dst, src, len)
 #endif
 #endif /* _ASM_RISCV_STRING_H */
index fa896c5..f6caf4d 100644 (file)
@@ -56,5 +56,3 @@ obj-$(CONFIG_KGDB)            += kgdb.o
 obj-$(CONFIG_JUMP_LABEL)       += jump_label.o
 
 obj-$(CONFIG_EFI)              += efi.o
-
-clean:
index db20344..b79ffa3 100644 (file)
@@ -11,6 +11,8 @@
 #include <asm/thread_info.h>
 #include <asm/ptrace.h>
 
+void asm_offsets(void);
+
 void asm_offsets(void)
 {
        OFFSET(TASK_THREAD_RA, task_struct, thread.ra);
index 7e84979..16e9941 100644 (file)
@@ -182,7 +182,6 @@ setup_trap_vector:
 
 END(_start)
 
-       __INIT
 ENTRY(_start_kernel)
        /* Mask all interrupts */
        csrw CSR_IE, zero
index cf19019..0bb1854 100644 (file)
@@ -4,11 +4,7 @@
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
 
-/* Kernel callchain */
-struct stackframe {
-       unsigned long fp;
-       unsigned long ra;
-};
+#include <asm/stacktrace.h>
 
 /*
  * Get the return address for a single stackframe and return a pointer to the
@@ -74,13 +70,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
                fp = user_backtrace(entry, fp, 0);
 }
 
-bool fill_callchain(unsigned long pc, void *entry)
+static bool fill_callchain(void *entry, unsigned long pc)
 {
        return perf_callchain_store(entry, pc);
 }
 
-void notrace walk_stackframe(struct task_struct *task,
-       struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg);
 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
                           struct pt_regs *regs)
 {
index 450492e..5ab1c7e 100644 (file)
@@ -11,5 +11,7 @@
  */
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(__memset);
 EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
index 117f321..1d85e9b 100644 (file)
@@ -4,6 +4,8 @@
  *  Chen Liqin <liqin.chen@sunplusct.com>
  *  Lennox Wu <lennox.wu@sunplusct.com>
  * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2020 FORTH-ICS/CARV
+ *  Nick Kossifidis <mick@ics.forth.gr>
  */
 
 #include <linux/init.h>
@@ -22,6 +24,7 @@
 #include <asm/cpu_ops.h>
 #include <asm/early_ioremap.h>
 #include <asm/setup.h>
+#include <asm/set_memory.h>
 #include <asm/sections.h>
 #include <asm/sbi.h>
 #include <asm/tlbflush.h>
@@ -51,6 +54,163 @@ atomic_t hart_lottery __section(".sdata");
 unsigned long boot_cpu_hartid;
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
+/*
+ * Place kernel memory regions on the resource tree so that
+ * kexec-tools can retrieve them from /proc/iomem. While there
+ * also add "System RAM" regions for compatibility with other
+ * archs, and the rest of the known regions for completeness.
+ */
+static struct resource code_res = { .name = "Kernel code", };
+static struct resource data_res = { .name = "Kernel data", };
+static struct resource rodata_res = { .name = "Kernel rodata", };
+static struct resource bss_res = { .name = "Kernel bss", };
+
+static int __init add_resource(struct resource *parent,
+                               struct resource *res)
+{
+       int ret = 0;
+
+       ret = insert_resource(parent, res);
+       if (ret < 0) {
+               pr_err("Failed to add a %s resource at %llx\n",
+                       res->name, (unsigned long long) res->start);
+               return ret;
+       }
+
+       return 1;
+}
+
+static int __init add_kernel_resources(struct resource *res)
+{
+       int ret = 0;
+
+       /*
+        * The memory region of the kernel image is continuous and
+        * was reserved on setup_bootmem, find it here and register
+        * it as a resource, then register the various segments of
+        * the image as child nodes
+        */
+       if (!(res->start <= code_res.start && res->end >= data_res.end))
+               return 0;
+
+       res->name = "Kernel image";
+       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+       /*
+        * We removed a part of this region on setup_bootmem so
+        * we need to expand the resource for the bss to fit in.
+        */
+       res->end = bss_res.end;
+
+       ret = add_resource(&iomem_resource, res);
+       if (ret < 0)
+               return ret;
+
+       ret = add_resource(res, &code_res);
+       if (ret < 0)
+               return ret;
+
+       ret = add_resource(res, &rodata_res);
+       if (ret < 0)
+               return ret;
+
+       ret = add_resource(res, &data_res);
+       if (ret < 0)
+               return ret;
+
+       ret = add_resource(res, &bss_res);
+
+       return ret;
+}
+
+static void __init init_resources(void)
+{
+       struct memblock_region *region = NULL;
+       struct resource *res = NULL;
+       int ret = 0;
+
+       code_res.start = __pa_symbol(_text);
+       code_res.end = __pa_symbol(_etext) - 1;
+       code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+       rodata_res.start = __pa_symbol(__start_rodata);
+       rodata_res.end = __pa_symbol(__end_rodata) - 1;
+       rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+       data_res.start = __pa_symbol(_data);
+       data_res.end = __pa_symbol(_edata) - 1;
+       data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+       bss_res.start = __pa_symbol(__bss_start);
+       bss_res.end = __pa_symbol(__bss_stop) - 1;
+       bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+       /*
+        * Start by adding the reserved regions, if they overlap
+        * with /memory regions, insert_resource later on will take
+        * care of it.
+        */
+       for_each_reserved_mem_region(region) {
+               res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+               if (!res)
+                       panic("%s: Failed to allocate %zu bytes\n", __func__,
+                             sizeof(struct resource));
+
+               res->name = "Reserved";
+               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region));
+               res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1;
+
+               ret = add_kernel_resources(res);
+               if (ret < 0)
+                       goto error;
+               else if (ret)
+                       continue;
+
+               /*
+                * Ignore any other reserved regions within
+                * system memory.
+                */
+               if (memblock_is_memory(res->start))
+                       continue;
+
+               ret = add_resource(&iomem_resource, res);
+               if (ret < 0)
+                       goto error;
+       }
+
+       /* Add /memory regions to the resource tree */
+       for_each_mem_region(region) {
+               res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+               if (!res)
+                       panic("%s: Failed to allocate %zu bytes\n", __func__,
+                             sizeof(struct resource));
+
+               if (unlikely(memblock_is_nomap(region))) {
+                       res->name = "Reserved";
+                       res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               } else {
+                       res->name = "System RAM";
+                       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+               }
+
+               res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+               res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+
+               ret = add_resource(&iomem_resource, res);
+               if (ret < 0)
+                       goto error;
+       }
+
+       return;
+
+ error:
+       memblock_free((phys_addr_t) res, sizeof(struct resource));
+       /* Better an empty resource tree than an inconsistent one */
+       release_child_resources(&iomem_resource);
+}
+
+
 static void __init parse_dtb(void)
 {
        /* Early scan of device tree from init memory */
@@ -81,6 +241,7 @@ void __init setup_arch(char **cmdline_p)
        efi_init();
        setup_bootmem();
        paging_init();
+       init_resources();
 #if IS_ENABLED(CONFIG_BUILTIN_DTB)
        unflatten_and_copy_device_tree();
 #else
@@ -90,6 +251,11 @@ void __init setup_arch(char **cmdline_p)
                pr_err("No DTB found in kernel mappings\n");
 #endif
 
+       if (IS_ENABLED(CONFIG_RISCV_SBI))
+               sbi_init();
+
+       if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+               protect_kernel_text_data();
 #ifdef CONFIG_SWIOTLB
        swiotlb_init(1);
 #endif
@@ -98,10 +264,6 @@ void __init setup_arch(char **cmdline_p)
        kasan_init();
 #endif
 
-#if IS_ENABLED(CONFIG_RISCV_SBI)
-       sbi_init();
-#endif
-
 #ifdef CONFIG_SMP
        setup_smp();
 #endif
@@ -123,3 +285,12 @@ static int __init topology_init(void)
        return 0;
 }
 subsys_initcall(topology_init);
+
+void free_initmem(void)
+{
+       unsigned long init_begin = (unsigned long)__init_begin;
+       unsigned long init_end = (unsigned long)__init_end;
+
+       set_memory_rw_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
+       free_initmem_default(POISON_FREE_INITMEM);
+}
index 5953429..48b870a 100644 (file)
 #include <linux/stacktrace.h>
 #include <linux/ftrace.h>
 
+#include <asm/stacktrace.h>
+
 register unsigned long sp_in_global __asm__("sp");
 
 #ifdef CONFIG_FRAME_POINTER
 
-struct stackframe {
-       unsigned long fp;
-       unsigned long ra;
-};
-
 void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
-                            bool (*fn)(unsigned long, void *), void *arg)
+                            bool (*fn)(void *, unsigned long), void *arg)
 {
        unsigned long fp, sp, pc;
 
@@ -46,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
                unsigned long low, high;
                struct stackframe *frame;
 
-               if (unlikely(!__kernel_text_address(pc) || fn(pc, arg)))
+               if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
                        break;
 
                /* Validate frame pointer */
@@ -66,7 +63,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 #else /* !CONFIG_FRAME_POINTER */
 
 void notrace walk_stackframe(struct task_struct *task,
-       struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg)
+       struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg)
 {
        unsigned long sp, pc;
        unsigned long *ksp;
@@ -88,7 +85,7 @@ void notrace walk_stackframe(struct task_struct *task,
 
        ksp = (unsigned long *)sp;
        while (!kstack_end(ksp)) {
-               if (__kernel_text_address(pc) && unlikely(fn(pc, arg)))
+               if (__kernel_text_address(pc) && unlikely(!fn(arg, pc)))
                        break;
                pc = (*ksp++) - 0x4;
        }
@@ -96,13 +93,12 @@ void notrace walk_stackframe(struct task_struct *task,
 
 #endif /* CONFIG_FRAME_POINTER */
 
-
-static bool print_trace_address(unsigned long pc, void *arg)
+static bool print_trace_address(void *arg, unsigned long pc)
 {
        const char *loglvl = arg;
 
        print_ip_sym(loglvl, pc);
-       return false;
+       return true;
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
@@ -111,14 +107,14 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
        walk_stackframe(task, NULL, print_trace_address, (void *)loglvl);
 }
 
-static bool save_wchan(unsigned long pc, void *arg)
+static bool save_wchan(void *arg, unsigned long pc)
 {
        if (!in_sched_functions(pc)) {
                unsigned long *p = arg;
                *p = pc;
-               return true;
+               return false;
        }
-       return false;
+       return true;
 }
 
 unsigned long get_wchan(struct task_struct *task)
@@ -130,42 +126,12 @@ unsigned long get_wchan(struct task_struct *task)
        return pc;
 }
 
-
 #ifdef CONFIG_STACKTRACE
 
-static bool __save_trace(unsigned long pc, void *arg, bool nosched)
-{
-       struct stack_trace *trace = arg;
-
-       if (unlikely(nosched && in_sched_functions(pc)))
-               return false;
-       if (unlikely(trace->skip > 0)) {
-               trace->skip--;
-               return false;
-       }
-
-       trace->entries[trace->nr_entries++] = pc;
-       return (trace->nr_entries >= trace->max_entries);
-}
-
-static bool save_trace(unsigned long pc, void *arg)
-{
-       return __save_trace(pc, arg, false);
-}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-       walk_stackframe(tsk, NULL, save_trace, trace);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-void save_stack_trace(struct stack_trace *trace)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+                    struct task_struct *task, struct pt_regs *regs)
 {
-       save_stack_trace_tsk(NULL, trace);
+       walk_stackframe(task, regs, consume_entry, cookie);
 }
-EXPORT_SYMBOL_GPL(save_stack_trace);
 
 #endif /* CONFIG_STACKTRACE */
index 3ffbd6c..de03cb2 100644 (file)
@@ -29,8 +29,30 @@ SECTIONS
        HEAD_TEXT_SECTION
        . = ALIGN(PAGE_SIZE);
 
+       .text : {
+               _text = .;
+               _stext = .;
+               TEXT_TEXT
+               SCHED_TEXT
+               CPUIDLE_TEXT
+               LOCK_TEXT
+               KPROBES_TEXT
+               ENTRY_TEXT
+               IRQENTRY_TEXT
+               SOFTIRQENTRY_TEXT
+               *(.fixup)
+               _etext = .;
+       }
+
+       . = ALIGN(SECTION_ALIGN);
        __init_begin = .;
-       INIT_TEXT_SECTION(PAGE_SIZE)
+       __init_text_begin = .;
+       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) ALIGN(SECTION_ALIGN) { \
+               _sinittext = .;                                         \
+               INIT_TEXT                                               \
+               _einittext = .;                                         \
+       }
+
        . = ALIGN(8);
        __soc_early_init_table : {
                __soc_early_init_table_start = .;
@@ -47,35 +69,28 @@ SECTIONS
        {
                EXIT_TEXT
        }
+
+       __init_text_end = .;
+       . = ALIGN(SECTION_ALIGN);
+#ifdef CONFIG_EFI
+       . = ALIGN(PECOFF_SECTION_ALIGNMENT);
+       __pecoff_text_end = .;
+#endif
+       /* Start of init data section */
+       __init_data_begin = .;
+       INIT_DATA_SECTION(16)
        .exit.data :
        {
                EXIT_DATA
        }
        PERCPU_SECTION(L1_CACHE_BYTES)
-       __init_end = .;
 
-       . = ALIGN(SECTION_ALIGN);
-       .text : {
-               _text = .;
-               _stext = .;
-               TEXT_TEXT
-               SCHED_TEXT
-               CPUIDLE_TEXT
-               LOCK_TEXT
-               KPROBES_TEXT
-               ENTRY_TEXT
-               IRQENTRY_TEXT
-               SOFTIRQENTRY_TEXT
-               *(.fixup)
-               _etext = .;
+       .rel.dyn : {
+               *(.rel.dyn*)
        }
 
-#ifdef CONFIG_EFI
-       . = ALIGN(PECOFF_SECTION_ALIGNMENT);
-       __pecoff_text_end = .;
-#endif
-
-       INIT_DATA_SECTION(16)
+       __init_data_end = .;
+       __init_end = .;
 
        /* Start of data section */
        _sdata = .;
@@ -105,10 +120,6 @@ SECTIONS
 
        BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
-       .rel.dyn : {
-               *(.rel.dyn*)
-       }
-
 #ifdef CONFIG_EFI
        . = ALIGN(PECOFF_SECTION_ALIGNMENT);
        __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
index 47e7a82..ac6171e 100644 (file)
@@ -2,5 +2,6 @@
 lib-y                  += delay.o
 lib-y                  += memcpy.o
 lib-y                  += memset.o
+lib-y                  += memmove.o
 lib-$(CONFIG_MMU)      += uaccess.o
 lib-$(CONFIG_64BIT)    += tishift.o
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
new file mode 100644 (file)
index 0000000..07d1d21
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ENTRY(__memmove)
+WEAK(memmove)
+        move    t0, a0
+        move    t1, a1
+
+        beq     a0, a1, exit_memcpy
+        beqz    a2, exit_memcpy
+        srli    t2, a2, 0x2
+
+        slt     t3, a0, a1
+        beqz    t3, do_reverse
+
+        andi    a2, a2, 0x3
+        li      t4, 1
+        beqz    t2, byte_copy
+
+word_copy:
+        lw      t3, 0(a1)
+        addi    t2, t2, -1
+        addi    a1, a1, 4
+        sw      t3, 0(a0)
+        addi    a0, a0, 4
+        bnez    t2, word_copy
+        beqz    a2, exit_memcpy
+        j       byte_copy
+
+do_reverse:
+        add     a0, a0, a2
+        add     a1, a1, a2
+        andi    a2, a2, 0x3
+        li      t4, -1
+        beqz    t2, reverse_byte_copy
+
+reverse_word_copy:
+        addi    a1, a1, -4
+        addi    t2, t2, -1
+        lw      t3, 0(a1)
+        addi    a0, a0, -4
+        sw      t3, 0(a0)
+        bnez    t2, reverse_word_copy
+        beqz    a2, exit_memcpy
+
+reverse_byte_copy:
+        addi    a0, a0, -1
+        addi    a1, a1, -1
+
+byte_copy:
+        lb      t3, 0(a1)
+        addi    a2, a2, -1
+        sb      t3, 0(a0)
+        add     a1, a1, t4
+        add     a0, a0, t4
+        bnez    a2, byte_copy
+
+exit_memcpy:
+        move a0, t0
+        move a1, t1
+        ret
+END(__memmove)
index 8e577f1..13ba533 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
 #include <linux/set_memory.h>
+#include <linux/dma-map-ops.h>
 
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
@@ -41,13 +42,14 @@ struct pt_alloc_ops {
 #endif
 };
 
+static phys_addr_t dma32_phys_limit __ro_after_init;
+
 static void __init zone_sizes_init(void)
 {
        unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
 
 #ifdef CONFIG_ZONE_DMA32
-       max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G,
-                       (unsigned long) PFN_PHYS(max_low_pfn)));
+       max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
 #endif
        max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
@@ -181,6 +183,7 @@ void __init setup_bootmem(void)
 
        max_pfn = PFN_DOWN(memblock_end_of_DRAM());
        max_low_pfn = max_pfn;
+       dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
        set_max_mapnr(max_low_pfn);
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -194,6 +197,7 @@ void __init setup_bootmem(void)
        memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
 
        early_init_fdt_scan_reserved_mem();
+       dma_contiguous_reserve(dma32_phys_limit);
        memblock_allow_resize();
        memblock_dump_all();
 }
@@ -618,48 +622,33 @@ static inline void setup_vm_final(void)
 #endif /* CONFIG_MMU */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void mark_rodata_ro(void)
+void protect_kernel_text_data(void)
 {
-       unsigned long text_start = (unsigned long)_text;
-       unsigned long text_end = (unsigned long)_etext;
+       unsigned long text_start = (unsigned long)_start;
+       unsigned long init_text_start = (unsigned long)__init_text_begin;
+       unsigned long init_data_start = (unsigned long)__init_data_begin;
        unsigned long rodata_start = (unsigned long)__start_rodata;
        unsigned long data_start = (unsigned long)_data;
        unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
 
-       set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
-       set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+       set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT);
+       set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT);
+       set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT);
+       /* rodata section is marked readonly in mark_rodata_ro */
        set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
        set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
-
-       debug_checkwx();
 }
-#endif
 
-static void __init resource_init(void)
+void mark_rodata_ro(void)
 {
-       struct memblock_region *region;
-
-       for_each_mem_region(region) {
-               struct resource *res;
-
-               res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
-               if (!res)
-                       panic("%s: Failed to allocate %zu bytes\n", __func__,
-                             sizeof(struct resource));
+       unsigned long rodata_start = (unsigned long)__start_rodata;
+       unsigned long data_start = (unsigned long)_data;
 
-               if (memblock_is_nomap(region)) {
-                       res->name = "reserved";
-                       res->flags = IORESOURCE_MEM;
-               } else {
-                       res->name = "System RAM";
-                       res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
-               }
-               res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
-               res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+       set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
 
-               request_resource(&iomem_resource, res);
-       }
+       debug_checkwx();
 }
+#endif
 
 void __init paging_init(void)
 {
@@ -667,7 +656,6 @@ void __init paging_init(void)
        sparse_init();
        setup_zero_page();
        zone_sizes_init();
-       resource_init();
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
index 87ba5a6..5e49e4b 100644 (file)
@@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
        return ret;
 }
 
+int set_memory_rw_nx(unsigned long addr, int numpages)
+{
+       return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
+                           __pgprot(_PAGE_EXEC));
+}
+
 int set_memory_ro(unsigned long addr, int numpages)
 {
        return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
index f795eeb..e84bdd1 100644 (file)
@@ -150,6 +150,7 @@ config S390
        select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO
+       select HAVE_IRQ_EXIT_ON_IRQ_STACK
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZ4
index b11e810..faccb33 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #undef CONFIG_KASAN
+#undef CONFIG_KASAN_GENERIC
 #include "../lib/string.c"
 
 int strncmp(const char *cs, const char *ct, size_t count)
index 4a08379..21a8fe1 100644 (file)
 #ifndef _S390_DELAY_H
 #define _S390_DELAY_H
 
-void udelay_enable(void);
-void __ndelay(unsigned long long nsecs);
-void __udelay(unsigned long long usecs);
-void udelay_simple(unsigned long long usecs);
+void __ndelay(unsigned long nsecs);
+void __udelay(unsigned long usecs);
 void __delay(unsigned long loops);
 
-#define ndelay(n) __ndelay((unsigned long long) (n))
-#define udelay(n) __udelay((unsigned long long) (n))
-#define mdelay(n) __udelay((unsigned long long) (n) * 1000)
+#define ndelay(n) __ndelay((unsigned long)(n))
+#define udelay(n) __udelay((unsigned long)(n))
+#define mdelay(n) __udelay((unsigned long)(n) * 1000)
 
 #endif /* defined(_S390_DELAY_H) */
index 463c24e..74f9a03 100644 (file)
@@ -459,6 +459,7 @@ struct kvm_vcpu_stat {
        u64 diagnose_308;
        u64 diagnose_500;
        u64 diagnose_other;
+       u64 pfault_sync;
 };
 
 #define PGM_OPERATION                  0x01
index 6b7269f..2058a43 100644 (file)
 
 #define CIF_NOHZ_DELAY         2       /* delay HZ disable for a tick */
 #define CIF_FPU                        3       /* restore FPU registers */
-#define CIF_IGNORE_IRQ         4       /* ignore interrupt (for udelay) */
 #define CIF_ENABLED_WAIT       5       /* in enabled wait state */
 #define CIF_MCCK_GUEST         6       /* machine check happening in guest */
 #define CIF_DEDICATED_CPU      7       /* this CPU is dedicated */
 
 #define _CIF_NOHZ_DELAY                BIT(CIF_NOHZ_DELAY)
 #define _CIF_FPU               BIT(CIF_FPU)
-#define _CIF_IGNORE_IRQ                BIT(CIF_IGNORE_IRQ)
 #define _CIF_ENABLED_WAIT      BIT(CIF_ENABLED_WAIT)
 #define _CIF_MCCK_GUEST                BIT(CIF_MCCK_GUEST)
 #define _CIF_DEDICATED_CPU     BIT(CIF_DEDICATED_CPU)
@@ -293,11 +291,6 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
 }
 
 /*
- * Function to stop a processor until the next interrupt occurs
- */
-void enabled_wait(void);
-
-/*
  * Function to drop a processor into disabled wait state
  */
 static __always_inline void __noreturn disabled_wait(void)
index 1f46592..f1ba197 100644 (file)
@@ -414,6 +414,7 @@ ENTRY(system_call)
        mvc     __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
        mvc     __PT_INT_CODE(4,%r11),__LC_SVC_ILC
        stg     %r14,__PT_FLAGS(%r11)
+       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        ENABLE_INTS
 .Lsysc_do_svc:
        # clear user controlled register to prevent speculative use
@@ -430,7 +431,6 @@ ENTRY(system_call)
        jnl     .Lsysc_nr_ok
        slag    %r8,%r1,3
 .Lsysc_nr_ok:
-       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        stg     %r2,__PT_ORIG_GPR2(%r11)
        stg     %r7,STACK_FRAME_OVERHEAD(%r15)
        lg      %r9,0(%r8,%r10)                 # get system call add.
@@ -699,8 +699,8 @@ ENTRY(pgm_check_handler)
        mvc     __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
        mvc     __THREAD_per_cause(2,%r14),__LC_PER_CODE
        mvc     __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-6:     RESTORE_SM_CLEAR_PER
-       xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+6:     xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+       RESTORE_SM_CLEAR_PER
        larl    %r1,pgm_check_table
        llgh    %r10,__PT_INT_CODE+2(%r11)
        nill    %r10,0x007f
@@ -731,8 +731,8 @@ ENTRY(pgm_check_handler)
 # PER event in supervisor state, must be kprobes
 #
 .Lpgm_kprobe:
-       RESTORE_SM_CLEAR_PER
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+       RESTORE_SM_CLEAR_PER
        lgr     %r2,%r11                # pass pointer to pt_regs
        brasl   %r14,do_per_trap
        j       .Lpgm_return
@@ -778,10 +778,8 @@ ENTRY(io_int_handler)
 .Lio_skip_asce:
        mvc     __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-       TSTMSK  __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
-       jo      .Lio_restore
-       TRACE_IRQS_OFF
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+       TRACE_IRQS_OFF
 .Lio_loop:
        lgr     %r2,%r11                # pass pointer to pt_regs
        lghi    %r3,IO_INTERRUPT
@@ -966,10 +964,8 @@ ENTRY(ext_int_handler)
        mvc     __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
        mvc     __PT_INT_PARM_LONG(8,%r11),0(%r1)
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
-       TSTMSK  __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
-       jo      .Lio_restore
-       TRACE_IRQS_OFF
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+       TRACE_IRQS_OFF
        lgr     %r2,%r11                # pass pointer to pt_regs
        lghi    %r3,EXT_INTERRUPT
        brasl   %r14,do_IRQ
index 2b85096..a5d4d80 100644 (file)
@@ -9,7 +9,6 @@
 
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
-#include <linux/kprobes.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/cpu.h>
 
 static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void enabled_wait(void)
+void arch_cpu_idle(void)
 {
        struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
        unsigned long long idle_time;
-       unsigned long psw_mask, flags;
-
+       unsigned long psw_mask;
 
        /* Wait for external, I/O or machine check interrupt. */
        psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
        clear_cpu_flag(CIF_NOHZ_DELAY);
 
-       raw_local_irq_save(flags);
-       /* Call the assembler magic in entry.S */
+       /* psw_idle() returns with interrupts disabled. */
        psw_idle(idle, psw_mask);
-       raw_local_irq_restore(flags);
 
        /* Account time spent with enabled wait psw loaded as idle time. */
        raw_write_seqcount_begin(&idle->seqcount);
@@ -46,8 +42,8 @@ void enabled_wait(void)
        idle->idle_count++;
        account_idle_time(cputime_to_nsecs(idle_time));
        raw_write_seqcount_end(&idle->seqcount);
+       raw_local_irq_enable();
 }
-NOKPROBE_SYMBOL(enabled_wait);
 
 static ssize_t show_idle_count(struct device *dev,
                                struct device_attribute *attr, char *buf)
@@ -120,12 +116,6 @@ void arch_cpu_idle_enter(void)
 {
 }
 
-void arch_cpu_idle(void)
-{
-       enabled_wait();
-       raw_local_irq_enable();
-}
-
 void arch_cpu_idle_exit(void)
 {
 }
index 98b3aca..7a21eca 100644 (file)
@@ -1512,7 +1512,7 @@ static void diag308_dump(void *dump_block)
        while (1) {
                if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
                        break;
-               udelay_simple(USEC_PER_SEC);
+               udelay(USEC_PER_SEC);
        }
 }
 
index 1f16a03..1fbed91 100644 (file)
@@ -335,7 +335,6 @@ int __init arch_early_irq_init(void)
        if (!stack)
                panic("Couldn't allocate async stack");
        S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
-       udelay_enable();
        return 0;
 }
 
index 28c1680..d443423 100644 (file)
 438  common    pidfd_getfd             sys_pidfd_getfd                 sys_pidfd_getfd
 439  common    faccessat2              sys_faccessat2                  sys_faccessat2
 440  common    process_madvise         sys_process_madvise             sys_process_madvise
+441  common    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
index 394a5f5..3765c42 100644 (file)
@@ -184,7 +184,7 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
        if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
                return -EINVAL;
 
-       wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+       wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL_ACCOUNT);
        if (!wp_info->old_data)
                return -ENOMEM;
        /* try to backup the original value */
@@ -234,7 +234,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
        if (nr_wp > 0) {
                wp_info = kmalloc_array(nr_wp,
                                        sizeof(*wp_info),
-                                       GFP_KERNEL);
+                                       GFP_KERNEL_ACCOUNT);
                if (!wp_info) {
                        ret = -ENOMEM;
                        goto error;
@@ -243,7 +243,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
        if (nr_bp > 0) {
                bp_info = kmalloc_array(nr_bp,
                                        sizeof(*bp_info),
-                                       GFP_KERNEL);
+                                       GFP_KERNEL_ACCOUNT);
                if (!bp_info) {
                        ret = -ENOMEM;
                        goto error;
@@ -349,7 +349,7 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
                if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
                        continue;
 
-               temp = kmalloc(wp_info->len, GFP_KERNEL);
+               temp = kmalloc(wp_info->len, GFP_KERNEL_ACCOUNT);
                if (!temp)
                        continue;
 
index e7a7c49..72b25b7 100644 (file)
@@ -398,7 +398,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
        if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       sctns = (void *)get_zeroed_page(GFP_KERNEL);
+       sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!sctns)
                return -ENOMEM;
 
index 2f17729..e3183bd 100644 (file)
@@ -1792,7 +1792,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                goto out;
        }
 gisa_out:
-       tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+       tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
        if (tmp_inti) {
                tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
                tmp_inti->io.io_int_word = isc_to_int_word(isc);
@@ -2015,7 +2015,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
        struct kvm_s390_interrupt_info *inti;
        int rc;
 
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+       inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
        if (!inti)
                return -ENOMEM;
 
@@ -2414,7 +2414,7 @@ static int enqueue_floating_irq(struct kvm_device *dev,
                return -EINVAL;
 
        while (len >= sizeof(struct kvm_s390_irq)) {
-               inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+               inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT);
                if (!inti)
                        return -ENOMEM;
 
@@ -2462,7 +2462,7 @@ static int register_io_adapter(struct kvm_device *dev,
        if (dev->kvm->arch.adapters[adapter_info.id] != NULL)
                return -EINVAL;
 
-       adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+       adapter = kzalloc(sizeof(*adapter), GFP_KERNEL_ACCOUNT);
        if (!adapter)
                return -ENOMEM;
 
@@ -3290,7 +3290,7 @@ int kvm_s390_gib_init(u8 nisc)
                goto out;
        }
 
-       gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+       gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
        if (!gib) {
                rc = -ENOMEM;
                goto out;
index 425d3d7..dbafd05 100644 (file)
@@ -60,6 +60,7 @@
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        VCPU_STAT("userspace_handled", exit_userspace),
        VCPU_STAT("exit_null", exit_null),
+       VCPU_STAT("pfault_sync", pfault_sync),
        VCPU_STAT("exit_validity", exit_validity),
        VCPU_STAT("exit_stop_request", exit_stop_request),
        VCPU_STAT("exit_external_request", exit_external_request),
@@ -1254,7 +1255,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
                ret = -EBUSY;
                goto out;
        }
-       proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+       proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
        if (!proc) {
                ret = -ENOMEM;
                goto out;
@@ -1416,7 +1417,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
        struct kvm_s390_vm_cpu_processor *proc;
        int ret = 0;
 
-       proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+       proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
        if (!proc) {
                ret = -ENOMEM;
                goto out;
@@ -1444,7 +1445,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
        struct kvm_s390_vm_cpu_machine *mach;
        int ret = 0;
 
-       mach = kzalloc(sizeof(*mach), GFP_KERNEL);
+       mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
        if (!mach) {
                ret = -ENOMEM;
                goto out;
@@ -1812,7 +1813,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
                return -EINVAL;
 
-       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
+       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
        if (!keys)
                return -ENOMEM;
 
@@ -1857,7 +1858,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
        if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
                return -EINVAL;
 
-       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
+       keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
        if (!keys)
                return -ENOMEM;
 
@@ -2625,7 +2626,7 @@ static void sca_dispose(struct kvm *kvm)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-       gfp_t alloc_flags = GFP_KERNEL;
+       gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
        int i, rc;
        char debug_name[16];
        static unsigned long sca_offset;
@@ -2670,7 +2671,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
        kvm->arch.sie_page2 =
-            (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+            (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
        if (!kvm->arch.sie_page2)
                goto out_err;
 
@@ -2900,7 +2901,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
        if (kvm->arch.use_esca)
                return 0;
 
-       new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
+       new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!new_sca)
                return -ENOMEM;
 
@@ -3133,7 +3134,7 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
 
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+       vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!vcpu->arch.sie_block->cbrlo)
                return -ENOMEM;
        return 0;
@@ -3243,7 +3244,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        int rc;
 
        BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
-       sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+       sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!sie_page)
                return -ENOMEM;
 
@@ -4109,6 +4110,7 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
                current->thread.gmap_pfault = 0;
                if (kvm_arch_setup_async_pf(vcpu))
                        return 0;
+               vcpu->stat.pfault_sync++;
                return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
        }
        return vcpu_post_run_fault_in_sie(vcpu);
index cd74989..9928f78 100644 (file)
@@ -879,7 +879,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        switch (fc) {
        case 1: /* same handling for 1 and 2 */
        case 2:
-               mem = get_zeroed_page(GFP_KERNEL);
+               mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
                if (!mem)
                        goto out_no_data;
                if (stsi((void *) mem, fc, sel1, sel2))
@@ -888,7 +888,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        case 3:
                if (sel1 != 2 || sel2 != 2)
                        goto out_no_data;
-               mem = get_zeroed_page(GFP_KERNEL);
+               mem = get_zeroed_page(GFP_KERNEL_ACCOUNT);
                if (!mem)
                        goto out_no_data;
                handle_stsi_3_2_2(vcpu, (void *) mem);
index f5847f9..813b6e9 100644 (file)
@@ -60,7 +60,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
        if (kvm_s390_pv_cpu_get_handle(vcpu))
                return -EINVAL;
 
-       vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+       vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
                                                   get_order(uv_info.guest_cpu_stor_len));
        if (!vcpu->arch.pv.stor_base)
                return -ENOMEM;
@@ -72,7 +72,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
        uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
 
        /* Alloc Secure Instruction Data Area Designation */
-       vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+       vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!vcpu->arch.sie_block->sidad) {
                free_pages(vcpu->arch.pv.stor_base,
                           get_order(uv_info.guest_cpu_stor_len));
@@ -120,7 +120,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
        struct kvm_memory_slot *memslot;
 
        kvm->arch.pv.stor_var = NULL;
-       kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+       kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
        if (!kvm->arch.pv.stor_base)
                return -ENOMEM;
 
index 4f3cbf6..c5d0a58 100644 (file)
@@ -1234,7 +1234,7 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr)
 
        mutex_lock(&kvm->arch.vsie.mutex);
        if (kvm->arch.vsie.page_count < nr_vcpus) {
-               page = alloc_page(GFP_KERNEL | __GFP_ZERO | GFP_DMA);
+               page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA);
                if (!page) {
                        mutex_unlock(&kvm->arch.vsie.mutex);
                        return ERR_PTR(-ENOMEM);
@@ -1336,7 +1336,7 @@ out_put:
 void kvm_s390_vsie_init(struct kvm *kvm)
 {
        mutex_init(&kvm->arch.vsie.mutex);
-       INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL);
+       INIT_RADIX_TREE(&kvm->arch.vsie.addr_to_page, GFP_KERNEL_ACCOUNT);
 }
 
 /* Destroy the vsie data structures. To be called when a vm is destroyed. */
index 68d61f2..f289afe 100644 (file)
 #include <asm/div64.h>
 #include <asm/idle.h>
 
-static DEFINE_STATIC_KEY_FALSE(udelay_ready);
-
-void __init udelay_enable(void)
-{
-       static_branch_enable(&udelay_ready);
-}
-
 void __delay(unsigned long loops)
 {
         /*
@@ -39,105 +32,25 @@ void __delay(unsigned long loops)
 }
 EXPORT_SYMBOL(__delay);
 
-static void __udelay_disabled(unsigned long long usecs)
+static void delay_loop(unsigned long delta)
 {
-       unsigned long cr0, cr0_new, psw_mask;
-       struct s390_idle_data idle;
-       u64 end;
+       unsigned long end;
 
-       end = get_tod_clock() + (usecs << 12);
-       __ctl_store(cr0, 0, 0);
-       cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK;
-       cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */
-       __ctl_load(cr0_new, 0, 0);
-       psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
-       set_clock_comparator(end);
-       set_cpu_flag(CIF_IGNORE_IRQ);
-       psw_idle(&idle, psw_mask);
-       trace_hardirqs_off();
-       clear_cpu_flag(CIF_IGNORE_IRQ);
-       set_clock_comparator(S390_lowcore.clock_comparator);
-       __ctl_load(cr0, 0, 0);
+       end = get_tod_clock_monotonic() + delta;
+       while (!tod_after(get_tod_clock_monotonic(), end))
+               cpu_relax();
 }
 
-static void __udelay_enabled(unsigned long long usecs)
+void __udelay(unsigned long usecs)
 {
-       u64 clock_saved, end;
-
-       end = get_tod_clock_fast() + (usecs << 12);
-       do {
-               clock_saved = 0;
-               if (tod_after(S390_lowcore.clock_comparator, end)) {
-                       clock_saved = local_tick_disable();
-                       set_clock_comparator(end);
-               }
-               enabled_wait();
-               if (clock_saved)
-                       local_tick_enable(clock_saved);
-       } while (get_tod_clock_fast() < end);
-}
-
-/*
- * Waits for 'usecs' microseconds using the TOD clock comparator.
- */
-void __udelay(unsigned long long usecs)
-{
-       unsigned long flags;
-
-       if (!static_branch_likely(&udelay_ready)) {
-               udelay_simple(usecs);
-               return;
-       }
-
-       preempt_disable();
-       local_irq_save(flags);
-       if (in_irq()) {
-               __udelay_disabled(usecs);
-               goto out;
-       }
-       if (in_softirq()) {
-               if (raw_irqs_disabled_flags(flags))
-                       __udelay_disabled(usecs);
-               else
-                       __udelay_enabled(usecs);
-               goto out;
-       }
-       if (raw_irqs_disabled_flags(flags)) {
-               local_bh_disable();
-               __udelay_disabled(usecs);
-               _local_bh_enable();
-               goto out;
-       }
-       __udelay_enabled(usecs);
-out:
-       local_irq_restore(flags);
-       preempt_enable();
+       delay_loop(usecs << 12);
 }
 EXPORT_SYMBOL(__udelay);
 
-/*
- * Simple udelay variant. To be used on startup and reboot
- * when the interrupt handler isn't working.
- */
-void udelay_simple(unsigned long long usecs)
-{
-       u64 end;
-
-       end = get_tod_clock_fast() + (usecs << 12);
-       while (get_tod_clock_fast() < end)
-               cpu_relax();
-}
-
-void __ndelay(unsigned long long nsecs)
+void __ndelay(unsigned long nsecs)
 {
-       u64 end;
-
        nsecs <<= 9;
        do_div(nsecs, 125);
-       end = get_tod_clock_fast() + nsecs;
-       if (nsecs & ~0xfffUL)
-               __udelay(nsecs >> 12);
-       while (get_tod_clock_fast() < end)
-               barrier();
+       delay_loop(nsecs);
 }
 EXPORT_SYMBOL(__ndelay);
index 7c98899..dcd8946 100644 (file)
@@ -9,12 +9,12 @@
 #include <linux/kallsyms.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/kprobes.h>
 #include <linux/wait.h>
 #include <asm/irq.h>
-#include <asm/delay.h>
 
 #define BT_BUF_SIZE (PAGE_SIZE * 4)
 
@@ -205,12 +205,15 @@ static noinline int unwindme_func3(struct unwindme *u)
 /* This function must appear in the backtrace. */
 static noinline int unwindme_func2(struct unwindme *u)
 {
+       unsigned long flags;
        int rc;
 
        if (u->flags & UWM_SWITCH_STACK) {
-               preempt_disable();
+               local_irq_save(flags);
+               local_mcck_disable();
                rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u);
-               preempt_enable();
+               local_mcck_enable();
+               local_irq_restore(flags);
                return rc;
        } else {
                return unwindme_func3(u);
@@ -223,31 +226,27 @@ static noinline int unwindme_func1(void *u)
        return unwindme_func2((struct unwindme *)u);
 }
 
-static void unwindme_irq_handler(struct ext_code ext_code,
-                                      unsigned int param32,
-                                      unsigned long param64)
+static void unwindme_timer_fn(struct timer_list *unused)
 {
        struct unwindme *u = READ_ONCE(unwindme);
 
-       if (u && u->task == current) {
+       if (u) {
                unwindme = NULL;
                u->task = NULL;
                u->ret = unwindme_func1(u);
+               complete(&u->task_ready);
        }
 }
 
+static struct timer_list unwind_timer;
+
 static int test_unwind_irq(struct unwindme *u)
 {
-       preempt_disable();
-       if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
-               pr_info("Couldn't register external interrupt handler");
-               return -1;
-       }
-       u->task = current;
        unwindme = u;
-       udelay(1);
-       unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler);
-       preempt_enable();
+       init_completion(&u->task_ready);
+       timer_setup(&unwind_timer, unwindme_timer_fn, 0);
+       mod_timer(&unwind_timer, jiffies + 1);
+       wait_for_completion(&u->task_ready);
        return u->ret;
 }
 
index 64795d0..9bb2c75 100644 (file)
@@ -2,7 +2,7 @@
 /*
  *  KVM guest address space mapping code
  *
- *    Copyright IBM Corp. 2007, 2016, 2018
+ *    Copyright IBM Corp. 2007, 2020
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *              David Hildenbrand <david@redhat.com>
  *              Janosch Frank <frankja@linux.vnet.ibm.com>
@@ -56,19 +56,19 @@ static struct gmap *gmap_alloc(unsigned long limit)
                atype = _ASCE_TYPE_REGION1;
                etype = _REGION1_ENTRY_EMPTY;
        }
-       gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+       gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
        if (!gmap)
                goto out;
        INIT_LIST_HEAD(&gmap->crst_list);
        INIT_LIST_HEAD(&gmap->children);
        INIT_LIST_HEAD(&gmap->pt_list);
-       INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
-       INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
-       INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC);
+       INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
+       INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
+       INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
        spin_lock_init(&gmap->guest_table_lock);
        spin_lock_init(&gmap->shadow_lock);
        refcount_set(&gmap->ref_count, 1);
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                goto out_free;
        page->index = 0;
@@ -309,7 +309,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
        unsigned long *new;
 
        /* since we dont free the gmap table until gmap_free we can unlock */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        new = (unsigned long *) page_to_phys(page);
@@ -594,7 +594,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
        if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
                return -EFAULT;
        /* Link gmap segment table entry location to page table. */
-       rc = radix_tree_preload(GFP_KERNEL);
+       rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
        if (rc)
                return rc;
        ptl = pmd_lock(mm, pmd);
@@ -1218,11 +1218,11 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
                vmaddr = __gmap_translate(parent, paddr);
                if (IS_ERR_VALUE(vmaddr))
                        return vmaddr;
-               rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+               rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
                if (!rmap)
                        return -ENOMEM;
                rmap->raddr = raddr;
-               rc = radix_tree_preload(GFP_KERNEL);
+               rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
                if (rc) {
                        kfree(rmap);
                        return rc;
@@ -1741,7 +1741,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 
        BUG_ON(!gmap_is_shadow(sg));
        /* Allocate a shadow region second table */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        page->index = r2t & _REGION_ENTRY_ORIGIN;
@@ -1825,7 +1825,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 
        BUG_ON(!gmap_is_shadow(sg));
        /* Allocate a shadow region second table */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        page->index = r3t & _REGION_ENTRY_ORIGIN;
@@ -1909,7 +1909,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 
        BUG_ON(!gmap_is_shadow(sg) || (sgt & _REGION3_ENTRY_LARGE));
        /* Allocate a shadow segment table */
-       page = alloc_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+       page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
        page->index = sgt & _REGION_ENTRY_ORIGIN;
@@ -2116,7 +2116,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
        parent = sg->parent;
        prot = (pte_val(pte) & _PAGE_PROTECT) ? PROT_READ : PROT_WRITE;
 
-       rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+       rmap = kzalloc(sizeof(*rmap), GFP_KERNEL_ACCOUNT);
        if (!rmap)
                return -ENOMEM;
        rmap->raddr = (saddr & PAGE_MASK) | _SHADOW_RMAP_PGTABLE;
@@ -2128,7 +2128,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
                        rc = vmaddr;
                        break;
                }
-               rc = radix_tree_preload(GFP_KERNEL);
+               rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
                if (rc)
                        break;
                rc = -EAGAIN;
index bffbe69..921d76f 100644 (file)
@@ -6,10 +6,10 @@
  */
 
 #include <linux/of.h>
+#include <linux/of_clk.h>
 #include <linux/of_fdt.h>
 #include <linux/clocksource.h>
 #include <linux/irqchip.h>
-#include <linux/clk-provider.h>
 #include <asm/machvec.h>
 #include <asm/rtc.h>
 
index 7837384..9df40ac 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2
index 7816026..40d8c7c 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
index 43333e3..34d302d 100644 (file)
@@ -15,6 +15,7 @@ config UML
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DEBUG_BUGVERBOSE
        select NO_DMA
+       select ARCH_HAS_SET_MEMORY
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
        select HAVE_GCC_PLUGINS
@@ -191,3 +192,8 @@ config UML_TIME_TRAVEL_SUPPORT
 endmenu
 
 source "arch/um/drivers/Kconfig"
+
+config ARCH_SUSPEND_POSSIBLE
+       def_bool y
+
+source "kernel/power/Kconfig"
index 4d80526..d8845d4 100644 (file)
@@ -26,10 +26,10 @@ int generic_read(int fd, char *c_out, void *unused)
        n = read(fd, c_out, sizeof(*c_out));
        if (n > 0)
                return n;
-       else if (errno == EAGAIN)
-               return 0;
        else if (n == 0)
                return -EIO;
+       else if (errno == EAGAIN)
+               return 0;
        return -errno;
 }
 
index 14ad9f4..1c70a31 100644 (file)
@@ -262,19 +262,25 @@ static irqreturn_t line_write_interrupt(int irq, void *data)
 int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 {
        const struct line_driver *driver = line->driver;
-       int err = 0;
+       int err;
 
-       if (input)
+       if (input) {
                err = um_request_irq(driver->read_irq, fd, IRQ_READ,
                                     line_interrupt, IRQF_SHARED,
                                     driver->read_irq_name, data);
-       if (err)
-               return err;
-       if (output)
+               if (err < 0)
+                       return err;
+       }
+
+       if (output) {
                err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
                                     line_write_interrupt, IRQF_SHARED,
                                     driver->write_irq_name, data);
-       return err;
+               if (err < 0)
+                       return err;
+       }
+
+       return 0;
 }
 
 static int line_activate(struct tty_port *port, struct tty_struct *tty)
@@ -608,7 +614,6 @@ static void free_winch(struct winch *winch)
        winch->fd = -1;
        if (fd != -1)
                os_close_file(fd);
-       list_del(&winch->list);
        __free_winch(&winch->work);
 }
 
@@ -709,6 +714,8 @@ static void unregister_winch(struct tty_struct *tty)
                winch = list_entry(ele, struct winch, list);
                wtty = tty_port_tty_get(winch->port);
                if (wtty == tty) {
+                       list_del(&winch->list);
+                       spin_unlock(&winch_handler_lock);
                        free_winch(winch);
                        break;
                }
@@ -719,14 +726,17 @@ static void unregister_winch(struct tty_struct *tty)
 
 static void winch_cleanup(void)
 {
-       struct list_head *ele, *next;
        struct winch *winch;
 
        spin_lock(&winch_handler_lock);
+       while ((winch = list_first_entry_or_null(&winch_handlers,
+                                                struct winch, list))) {
+               list_del(&winch->list);
+               spin_unlock(&winch_handler_lock);
 
-       list_for_each_safe(ele, next, &winch_handlers) {
-               winch = list_entry(ele, struct winch, list);
                free_winch(winch);
+
+               spin_lock(&winch_handler_lock);
        }
 
        spin_unlock(&winch_handler_lock);
index a2e680f..6d00af2 100644 (file)
@@ -738,7 +738,7 @@ static int __init mconsole_init(void)
 
        err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt,
                             IRQF_SHARED, "mconsole", (void *)sock);
-       if (err) {
+       if (err < 0) {
                printk(KERN_ERR "Failed to get IRQ for management console\n");
                goto out;
        }
index 1802cf4..2fc0b03 100644 (file)
@@ -160,7 +160,7 @@ static int uml_net_open(struct net_device *dev)
 
        err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
                             IRQF_SHARED, dev->name, dev);
-       if (err != 0) {
+       if (err < 0) {
                printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
                err = -ENETUNREACH;
                goto out_close;
index a47ca53..efa8b73 100644 (file)
@@ -100,7 +100,7 @@ static int port_accept(struct port_list *port)
                  .port         = port });
 
        if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt,
-                         IRQF_SHARED, "telnetd", conn)) {
+                         IRQF_SHARED, "telnetd", conn) < 0) {
                printk(KERN_ERR "port_accept : failed to get IRQ for "
                       "telnetd\n");
                goto out_free;
@@ -182,7 +182,7 @@ void *port_data(int port_num)
        }
 
        if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt,
-                         IRQF_SHARED, "port", port)) {
+                         IRQF_SHARED, "port", port) < 0) {
                printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num);
                goto out_close;
        }
index ce115fc..433a3f8 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/interrupt.h>
 #include <linux/miscdevice.h>
+#include <linux/hw_random.h>
 #include <linux/delay.h>
 #include <linux/uaccess.h>
 #include <init.h>
@@ -18,9 +19,8 @@
 #include <os.h>
 
 /*
- * core module and version information
+ * core module information
  */
-#define RNG_VERSION "1.0.0"
 #define RNG_MODULE_NAME "hw_random"
 
 /* Changed at init time, in the non-modular case, and at module load
  * protects against a module being loaded twice at the same time.
  */
 static int random_fd = -1;
-static DECLARE_WAIT_QUEUE_HEAD(host_read_wait);
+static struct hwrng hwrng = { 0, };
+static DECLARE_COMPLETION(have_data);
 
-static int rng_dev_open (struct inode *inode, struct file *filp)
+static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block)
 {
-       /* enforce read-only access to this chrdev */
-       if ((filp->f_mode & FMODE_READ) == 0)
-               return -EINVAL;
-       if ((filp->f_mode & FMODE_WRITE) != 0)
-               return -EINVAL;
+       int ret;
 
-       return 0;
-}
-
-static atomic_t host_sleep_count = ATOMIC_INIT(0);
-
-static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
-                            loff_t *offp)
-{
-       u32 data;
-       int n, ret = 0, have_data;
-
-       while (size) {
-               n = os_read_file(random_fd, &data, sizeof(data));
-               if (n > 0) {
-                       have_data = n;
-                       while (have_data && size) {
-                               if (put_user((u8) data, buf++)) {
-                                       ret = ret ? : -EFAULT;
-                                       break;
-                               }
-                               size--;
-                               ret++;
-                               have_data--;
-                               data >>= 8;
-                       }
-               }
-               else if (n == -EAGAIN) {
-                       DECLARE_WAITQUEUE(wait, current);
-
-                       if (filp->f_flags & O_NONBLOCK)
-                               return ret ? : -EAGAIN;
-
-                       atomic_inc(&host_sleep_count);
+       for (;;) {
+               ret = os_read_file(random_fd, buf, max);
+               if (block && ret == -EAGAIN) {
                        add_sigio_fd(random_fd);
 
-                       add_wait_queue(&host_read_wait, &wait);
-                       set_current_state(TASK_INTERRUPTIBLE);
+                       ret = wait_for_completion_killable(&have_data);
 
-                       schedule();
-                       remove_wait_queue(&host_read_wait, &wait);
+                       ignore_sigio_fd(random_fd);
+                       deactivate_fd(random_fd, RANDOM_IRQ);
 
-                       if (atomic_dec_and_test(&host_sleep_count)) {
-                               ignore_sigio_fd(random_fd);
-                               deactivate_fd(random_fd, RANDOM_IRQ);
-                       }
+                       if (ret < 0)
+                               break;
+               } else {
+                       break;
                }
-               else
-                       return n;
-
-               if (signal_pending (current))
-                       return ret ? : -ERESTARTSYS;
        }
-       return ret;
-}
-
-static const struct file_operations rng_chrdev_ops = {
-       .owner          = THIS_MODULE,
-       .open           = rng_dev_open,
-       .read           = rng_dev_read,
-       .llseek         = noop_llseek,
-};
 
-/* rng_init shouldn't be called more than once at boot time */
-static struct miscdevice rng_miscdev = {
-       HWRNG_MINOR,
-       RNG_MODULE_NAME,
-       &rng_chrdev_ops,
-};
+       return ret != -EAGAIN ? ret : 0;
+}
 
 static irqreturn_t random_interrupt(int irq, void *data)
 {
-       wake_up(&host_read_wait);
+       complete(&have_data);
 
        return IRQ_HANDLED;
 }
@@ -126,18 +74,19 @@ static int __init rng_init (void)
                goto out;
 
        random_fd = err;
-
        err = um_request_irq(RANDOM_IRQ, random_fd, IRQ_READ, random_interrupt,
                             0, "random", NULL);
-       if (err)
+       if (err < 0)
                goto err_out_cleanup_hw;
 
-       sigio_broken(random_fd, 1);
+       sigio_broken(random_fd);
+       hwrng.name = RNG_MODULE_NAME;
+       hwrng.read = rng_dev_read;
+       hwrng.quality = 1024;
 
-       err = misc_register (&rng_miscdev);
+       err = hwrng_register(&hwrng);
        if (err) {
-               printk (KERN_ERR RNG_MODULE_NAME ": misc device register "
-                       "failed\n");
+               pr_err(RNG_MODULE_NAME " registering failed (%d)\n", err);
                goto err_out_cleanup_hw;
        }
 out:
@@ -161,8 +110,8 @@ static void cleanup(void)
 
 static void __exit rng_cleanup(void)
 {
+       hwrng_unregister(&hwrng);
        os_close_file(random_fd);
-       misc_deregister (&rng_miscdev);
 }
 
 module_init (rng_init);
index eae8c83..13b1fe6 100644 (file)
 /* Max request size is determined by sector mask - 32K */
 #define UBD_MAX_REQUEST (8 * sizeof(long))
 
+struct io_desc {
+       char *buffer;
+       unsigned long length;
+       unsigned long sector_mask;
+       unsigned long long cow_offset;
+       unsigned long bitmap_words[2];
+};
+
 struct io_thread_req {
        struct request *req;
        int fds[2];
        unsigned long offsets[2];
        unsigned long long offset;
-       unsigned long length;
-       char *buffer;
        int sectorsize;
-       unsigned long sector_mask;
-       unsigned long long cow_offset;
-       unsigned long bitmap_words[2];
        int error;
+
+       int desc_cnt;
+       /* io_desc has to be the last element of the struct */
+       struct io_desc io_desc[];
 };
 
 
@@ -148,6 +155,7 @@ struct ubd {
        /* name (and fd, below) of the file opened for writing, either the
         * backing or the cow file. */
        char *file;
+       char *serial;
        int count;
        int fd;
        __u64 size;
@@ -173,6 +181,7 @@ struct ubd {
 
 #define DEFAULT_UBD { \
        .file =                 NULL, \
+       .serial =               NULL, \
        .count =                0, \
        .fd =                   -1, \
        .size =                 -1, \
@@ -265,7 +274,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
 {
        struct ubd *ubd_dev;
        struct openflags flags = global_openflags;
-       char *backing_file;
+       char *file, *backing_file, *serial;
        int n, err = 0, i;
 
        if(index_out) *index_out = -1;
@@ -361,24 +370,27 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
        goto out;
 
 break_loop:
-       backing_file = strchr(str, ',');
+       file = strsep(&str, ",:");
+       if (*file == '\0')
+               file = NULL;
 
-       if (backing_file == NULL)
-               backing_file = strchr(str, ':');
+       backing_file = strsep(&str, ",:");
+       if (*backing_file == '\0')
+               backing_file = NULL;
 
-       if(backing_file != NULL){
-               if(ubd_dev->no_cow){
-                       *error_out = "Can't specify both 'd' and a cow file";
-                       goto out;
-               }
-               else {
-                       *backing_file = '\0';
-                       backing_file++;
-               }
+       serial = strsep(&str, ",:");
+       if (*serial == '\0')
+               serial = NULL;
+
+       if (backing_file && ubd_dev->no_cow) {
+               *error_out = "Can't specify both 'd' and a cow file";
+               goto out;
        }
+
        err = 0;
-       ubd_dev->file = str;
+       ubd_dev->file = file;
        ubd_dev->cow.file = backing_file;
+       ubd_dev->serial = serial;
        ubd_dev->boot_openflags = flags;
 out:
        mutex_unlock(&ubd_lock);
@@ -399,7 +411,7 @@ static int ubd_setup(char *str)
 
 __setup("ubd", ubd_setup);
 __uml_help(ubd_setup,
-"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
+"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 "    This is used to associate a device with a file in the underlying\n"
 "    filesystem. When specifying two filenames, the first one is the\n"
 "    COW name and the second is the backing file name. As separator you can\n"
@@ -422,6 +434,12 @@ __uml_help(ubd_setup,
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
 "    't' will disable trim/discard support on the device (enabled by default).\n\n"
+"    An optional device serial number can be exposed using the serial parameter\n"
+"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
+"    useful when a unique number should be given to the device. Note when\n"
+"    specifying a label, the filename2 must be also presented. It can be\n"
+"    an empty string, in which case the backing file is not used:\n"
+"       ubd0=File,,Serial\n"
 );
 
 static int udb_setup(char *str)
@@ -525,12 +543,7 @@ static void ubd_handler(void)
                                blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
                                blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
                        }
-                       if ((io_req->error) || (io_req->buffer == NULL))
-                               blk_mq_end_request(io_req->req, io_req->error);
-                       else {
-                               if (!blk_update_request(io_req->req, io_req->error, io_req->length))
-                                       __blk_mq_end_request(io_req->req, io_req->error);
-                       }
+                       blk_mq_end_request(io_req->req, io_req->error);
                        kfree(io_req);
                }
        }
@@ -866,6 +879,41 @@ static void ubd_device_release(struct device *dev)
        *ubd_dev = ((struct ubd) DEFAULT_UBD);
 }
 
+static ssize_t serial_show(struct device *dev,
+                          struct device_attribute *attr, char *buf)
+{
+       struct gendisk *disk = dev_to_disk(dev);
+       struct ubd *ubd_dev = disk->private_data;
+
+       if (!ubd_dev)
+               return 0;
+
+       return sprintf(buf, "%s", ubd_dev->serial);
+}
+
+static DEVICE_ATTR_RO(serial);
+
+static struct attribute *ubd_attrs[] = {
+       &dev_attr_serial.attr,
+       NULL,
+};
+
+static umode_t ubd_attrs_are_visible(struct kobject *kobj,
+                                    struct attribute *a, int n)
+{
+       return a->mode;
+}
+
+static const struct attribute_group ubd_attr_group = {
+       .attrs = ubd_attrs,
+       .is_visible = ubd_attrs_are_visible,
+};
+
+static const struct attribute_group *ubd_attr_groups[] = {
+       &ubd_attr_group,
+       NULL,
+};
+
 static int ubd_disk_register(int major, u64 size, int unit,
                             struct gendisk **disk_out)
 {
@@ -897,7 +945,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 
        disk->private_data = &ubd_devs[unit];
        disk->queue = ubd_devs[unit].queue;
-       device_add_disk(parent, disk, NULL);
+       device_add_disk(parent, disk, ubd_attr_groups);
 
        *disk_out = disk;
        return 0;
@@ -946,6 +994,7 @@ static int ubd_add(int n, char **error_out)
        blk_queue_write_cache(ubd_dev->queue, true, false);
 
        blk_queue_max_segments(ubd_dev->queue, MAX_SG);
+       blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
        err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
        if(err){
                *error_out = "Failed to register device";
@@ -1192,7 +1241,7 @@ static int __init ubd_driver_init(void){
                /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
                 * enough. So use anyway the io thread. */
        }
-       stack = alloc_stack(0, 0);
+       stack = alloc_stack(0);
        io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
                                 &thread_fd);
        if(io_pid < 0){
@@ -1204,7 +1253,7 @@ static int __init ubd_driver_init(void){
        }
        err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
                             0, "ubd", ubd_devs);
-       if(err != 0)
+       if(err < 0)
                printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
        return 0;
 }
@@ -1289,37 +1338,74 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
        *cow_offset += bitmap_offset;
 }
 
-static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
+static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
+                      unsigned long offset, unsigned long *bitmap,
                       __u64 bitmap_offset, __u64 bitmap_len)
 {
-       __u64 sector = req->offset >> SECTOR_SHIFT;
+       __u64 sector = offset >> SECTOR_SHIFT;
        int i;
 
-       if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
+       if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
                panic("Operation too long");
 
        if (req_op(req->req) == REQ_OP_READ) {
-               for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
+               for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
                        if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
                                ubd_set_bit(i, (unsigned char *)
-                                           &req->sector_mask);
+                                           &segment->sector_mask);
                }
+       } else {
+               cowify_bitmap(offset, segment->length, &segment->sector_mask,
+                             &segment->cow_offset, bitmap, bitmap_offset,
+                             segment->bitmap_words, bitmap_len);
        }
-       else cowify_bitmap(req->offset, req->length, &req->sector_mask,
-                          &req->cow_offset, bitmap, bitmap_offset,
-                          req->bitmap_words, bitmap_len);
 }
 
-static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
-               u64 off, struct bio_vec *bvec)
+static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
+                       struct request *req)
+{
+       struct bio_vec bvec;
+       struct req_iterator iter;
+       int i = 0;
+       unsigned long byte_offset = io_req->offset;
+       int op = req_op(req);
+
+       if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
+               io_req->io_desc[0].buffer = NULL;
+               io_req->io_desc[0].length = blk_rq_bytes(req);
+       } else {
+               rq_for_each_segment(bvec, req, iter) {
+                       BUG_ON(i >= io_req->desc_cnt);
+
+                       io_req->io_desc[i].buffer =
+                               page_address(bvec.bv_page) + bvec.bv_offset;
+                       io_req->io_desc[i].length = bvec.bv_len;
+                       i++;
+               }
+       }
+
+       if (dev->cow.file) {
+               for (i = 0; i < io_req->desc_cnt; i++) {
+                       cowify_req(io_req, &io_req->io_desc[i], byte_offset,
+                                  dev->cow.bitmap, dev->cow.bitmap_offset,
+                                  dev->cow.bitmap_len);
+                       byte_offset += io_req->io_desc[i].length;
+               }
+
+       }
+}
+
+static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
+                                          int desc_cnt)
 {
-       struct ubd *dev = hctx->queue->queuedata;
        struct io_thread_req *io_req;
-       int ret;
+       int i;
 
-       io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
+       io_req = kmalloc(sizeof(*io_req) +
+                        (desc_cnt * sizeof(struct io_desc)),
+                        GFP_ATOMIC);
        if (!io_req)
-               return -ENOMEM;
+               return NULL;
 
        io_req->req = req;
        if (dev->cow.file)
@@ -1327,26 +1413,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
        else
                io_req->fds[0] = dev->fd;
        io_req->error = 0;
-
-       if (bvec != NULL) {
-               io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
-               io_req->length = bvec->bv_len;
-       } else {
-               io_req->buffer = NULL;
-               io_req->length = blk_rq_bytes(req);
-       }
-
        io_req->sectorsize = SECTOR_SIZE;
        io_req->fds[1] = dev->fd;
-       io_req->cow_offset = -1;
-       io_req->offset = off;
-       io_req->sector_mask = 0;
+       io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
        io_req->offsets[0] = 0;
        io_req->offsets[1] = dev->cow.data_offset;
 
-       if (dev->cow.file)
-               cowify_req(io_req, dev->cow.bitmap,
-                          dev->cow.bitmap_offset, dev->cow.bitmap_len);
+       for (i = 0 ; i < desc_cnt; i++) {
+               io_req->io_desc[i].sector_mask = 0;
+               io_req->io_desc[i].cow_offset = -1;
+       }
+
+       return io_req;
+}
+
+static int ubd_submit_request(struct ubd *dev, struct request *req)
+{
+       int segs = 0;
+       struct io_thread_req *io_req;
+       int ret;
+       int op = req_op(req);
+
+       if (op == REQ_OP_FLUSH)
+               segs = 0;
+       else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
+               segs = 1;
+       else
+               segs = blk_rq_nr_phys_segments(req);
+
+       io_req = ubd_alloc_req(dev, req, segs);
+       if (!io_req)
+               return -ENOMEM;
+
+       io_req->desc_cnt = segs;
+       if (segs)
+               ubd_map_req(dev, io_req, req);
 
        ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
        if (ret != sizeof(io_req)) {
@@ -1357,22 +1458,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
        return ret;
 }
 
-static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
-{
-       struct req_iterator iter;
-       struct bio_vec bvec;
-       int ret;
-       u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
-
-       rq_for_each_segment(bvec, req, iter) {
-               ret = ubd_queue_one_vec(hctx, req, off, &bvec);
-               if (ret < 0)
-                       return ret;
-               off += bvec.bv_len;
-       }
-       return 0;
-}
-
 static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
                                 const struct blk_mq_queue_data *bd)
 {
@@ -1385,17 +1470,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
        spin_lock_irq(&ubd_dev->lock);
 
        switch (req_op(req)) {
-       /* operations with no lentgth/offset arguments */
        case REQ_OP_FLUSH:
-               ret = ubd_queue_one_vec(hctx, req, 0, NULL);
-               break;
        case REQ_OP_READ:
        case REQ_OP_WRITE:
-               ret = queue_rw_req(hctx, req);
-               break;
        case REQ_OP_DISCARD:
        case REQ_OP_WRITE_ZEROES:
-               ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+               ret = ubd_submit_request(ubd_dev, req);
                break;
        default:
                WARN_ON_ONCE(1);
@@ -1483,22 +1563,22 @@ static int map_error(int error_code)
  * will result in unpredictable behaviour and/or crashes.
  */
 
-static int update_bitmap(struct io_thread_req *req)
+static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
 {
        int n;
 
-       if(req->cow_offset == -1)
+       if (segment->cow_offset == -1)
                return map_error(0);
 
-       n = os_pwrite_file(req->fds[1], &req->bitmap_words,
-                         sizeof(req->bitmap_words), req->cow_offset);
-       if (n != sizeof(req->bitmap_words))
+       n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
+                         sizeof(segment->bitmap_words), segment->cow_offset);
+       if (n != sizeof(segment->bitmap_words))
                return map_error(-n);
 
        return map_error(0);
 }
 
-static void do_io(struct io_thread_req *req)
+static void do_io(struct io_thread_req *req, struct io_desc *desc)
 {
        char *buf = NULL;
        unsigned long len;
@@ -1513,21 +1593,20 @@ static void do_io(struct io_thread_req *req)
                return;
        }
 
-       nsectors = req->length / req->sectorsize;
+       nsectors = desc->length / req->sectorsize;
        start = 0;
        do {
-               bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
+               bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
                end = start;
                while((end < nsectors) &&
-                     (ubd_test_bit(end, (unsigned char *)
-                                   &req->sector_mask) == bit))
+                     (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
                        end++;
 
                off = req->offset + req->offsets[bit] +
                        start * req->sectorsize;
                len = (end - start) * req->sectorsize;
-               if (req->buffer != NULL)
-                       buf = &req->buffer[start * req->sectorsize];
+               if (desc->buffer != NULL)
+                       buf = &desc->buffer[start * req->sectorsize];
 
                switch (req_op(req->req)) {
                case REQ_OP_READ:
@@ -1567,7 +1646,8 @@ static void do_io(struct io_thread_req *req)
                start = end;
        } while(start < nsectors);
 
-       req->error = update_bitmap(req);
+       req->offset += len;
+       req->error = update_bitmap(req, desc);
 }
 
 /* Changed in start_io_thread, which is serialized by being called only
@@ -1600,8 +1680,13 @@ int io_thread(void *arg)
                }
 
                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
+                       struct io_thread_req *req = (*io_req_buffer)[count];
+                       int i;
+
                        io_count++;
-                       do_io((*io_req_buffer)[count]);
+                       for (i = 0; !req->error && i < req->desc_cnt; i++)
+                               do_io(req, &(req->io_desc[i]));
+
                }
 
                written = 0;
index 555203e..47a02e6 100644 (file)
@@ -1196,9 +1196,9 @@ static int vector_net_close(struct net_device *dev)
 
 /* TX tasklet */
 
-static void vector_tx_poll(unsigned long data)
+static void vector_tx_poll(struct tasklet_struct *t)
 {
-       struct vector_private *vp = (struct vector_private *)data;
+       struct vector_private *vp = from_tasklet(vp, t, tx_poll);
 
        vp->estats.tx_kicks++;
        vector_send(vp->tx_queue);
@@ -1271,7 +1271,7 @@ static int vector_net_open(struct net_device *dev)
                irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
                        IRQ_READ, vector_rx_interrupt,
                        IRQF_SHARED, dev->name, dev);
-       if (err != 0) {
+       if (err < 0) {
                netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
                err = -ENETUNREACH;
                goto out_close;
@@ -1286,7 +1286,7 @@ static int vector_net_open(struct net_device *dev)
                        irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
                                IRQ_WRITE, vector_tx_interrupt,
                                IRQF_SHARED, dev->name, dev);
-               if (err != 0) {
+               if (err < 0) {
                        netdev_err(dev,
                                "vector_open: failed to get tx irq(%d)\n", err);
                        err = -ENETUNREACH;
@@ -1629,7 +1629,7 @@ static void vector_eth_configure(
        });
 
        dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
-       tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
+       tasklet_setup(&vp->tx_poll, vector_tx_poll);
        INIT_WORK(&vp->reset_tx, vector_reset_tx);
 
        timer_setup(&vp->tl, vector_timer_expire, 0);
index a6c4bb6..27e92d3 100644 (file)
 #include <os.h>
 #include "vhost_user.h"
 
-/* Workaround due to a conflict between irq_user.h and irqreturn.h */
-#ifdef IRQ_NONE
-#undef IRQ_NONE
-#endif
-
 #define MAX_SUPPORTED_QUEUE_SIZE       256
 
 #define to_virtio_uml_device(_vdev) \
@@ -55,7 +50,7 @@ struct virtio_uml_device {
        struct platform_device *pdev;
 
        spinlock_t sock_lock;
-       int sock, req_fd;
+       int sock, req_fd, irq;
        u64 features;
        u64 protocol_features;
        u8 status;
@@ -409,12 +404,14 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
                return rc;
        vu_dev->req_fd = req_fds[0];
 
-       rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
+       rc = um_request_irq(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
                            vu_req_interrupt, IRQF_SHARED,
                            vu_dev->pdev->name, vu_dev);
-       if (rc)
+       if (rc < 0)
                goto err_close;
 
+       vu_dev->irq = rc;
+
        rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
                                           req_fds[1]);
        if (rc)
@@ -423,7 +420,7 @@ static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
        goto out;
 
 err_free_irq:
-       um_free_irq(VIRTIO_IRQ, vu_dev);
+       um_free_irq(vu_dev->irq, vu_dev);
 err_close:
        os_close_file(req_fds[0]);
 out:
@@ -802,7 +799,11 @@ static void vu_del_vq(struct virtqueue *vq)
        struct virtio_uml_vq_info *info = vq->priv;
 
        if (info->call_fd >= 0) {
-               um_free_irq(VIRTIO_IRQ, vq);
+               struct virtio_uml_device *vu_dev;
+
+               vu_dev = to_virtio_uml_device(vq->vdev);
+
+               um_free_irq(vu_dev->irq, vq);
                os_close_file(info->call_fd);
        }
 
@@ -852,9 +853,9 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
                return rc;
 
        info->call_fd = call_fds[0];
-       rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
+       rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
                            vu_interrupt, IRQF_SHARED, info->name, vq);
-       if (rc)
+       if (rc < 0)
                goto close_both;
 
        rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
@@ -864,7 +865,7 @@ static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
        goto out;
 
 release_irq:
-       um_free_irq(VIRTIO_IRQ, vq);
+       um_free_irq(vu_dev->irq, vq);
 close_both:
        os_close_file(call_fds[0]);
 out:
@@ -969,7 +970,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
 
 error_setup:
        if (info->call_fd >= 0) {
-               um_free_irq(VIRTIO_IRQ, vq);
+               um_free_irq(vu_dev->irq, vq);
                os_close_file(info->call_fd);
        }
 error_call:
@@ -1078,7 +1079,7 @@ static void virtio_uml_release_dev(struct device *d)
 
        /* might not have been opened due to not negotiating the feature */
        if (vu_dev->req_fd >= 0) {
-               um_free_irq(VIRTIO_IRQ, vu_dev);
+               um_free_irq(vu_dev->irq, vu_dev);
                os_close_file(vu_dev->req_fd);
        }
 
index fc7f1e7..87ca4a4 100644 (file)
@@ -18,6 +18,7 @@
 struct xterm_chan {
        int pid;
        int helper_pid;
+       int chan_fd;
        char *title;
        int device;
        int raw;
@@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts)
                return NULL;
        *data = ((struct xterm_chan) { .pid             = -1,
                                       .helper_pid      = -1,
+                                      .chan_fd         = -1,
                                       .device          = device,
                                       .title           = opts->xterm_title,
                                       .raw             = opts->raw } );
@@ -149,6 +151,7 @@ static int xterm_open(int input, int output, int primary, void *d,
                goto out_kill;
        }
 
+       data->chan_fd = fd;
        new = xterm_fd(fd, &data->helper_pid);
        if (new < 0) {
                err = new;
@@ -206,6 +209,8 @@ static void xterm_close(int fd, void *d)
                os_kill_process(data->helper_pid, 0);
        data->helper_pid = -1;
 
+       if (data->chan_fd != -1)
+               os_close_file(data->chan_fd);
        os_close_file(fd);
 }
 
index d64ef6d..50f11b7 100644 (file)
@@ -51,7 +51,7 @@ int xterm_fd(int socket, int *pid_out)
 
        err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt,
                             IRQF_SHARED, "xterm", data);
-       if (err) {
+       if (err < 0) {
                printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, "
                       "err = %d\n",  err);
                ret = err;
index 42c6205..547bff7 100644 (file)
 #define TELNETD_IRQ            12
 #define XTERM_IRQ              13
 #define RANDOM_IRQ             14
-#define VIRTIO_IRQ             15
 
 #ifdef CONFIG_UML_NET_VECTOR
 
-#define VECTOR_BASE_IRQ                (VIRTIO_IRQ + 1)
+#define VECTOR_BASE_IRQ                (RANDOM_IRQ + 1)
 #define VECTOR_IRQ_SPACE       8
 
-#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
+#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
 
 #else
 
-#define LAST_IRQ VIRTIO_IRQ
+#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
 
 #endif
 
-#define NR_IRQS (LAST_IRQ + 1)
+#define NR_IRQS                        64
 
 #endif
index def3761..39376bb 100644 (file)
@@ -55,12 +55,15 @@ extern unsigned long end_iomem;
 #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define __PAGE_KERNEL_EXEC                                              \
         (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_RO                                               \
+        (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define PAGE_NONE      __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
 #define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_COPY      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
 #define PAGE_KERNEL    __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
 #define PAGE_KERNEL_EXEC       __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RO         __pgprot(__PAGE_KERNEL_RO)
 
 /*
  * The i386 can't do page protection for execute, and considers that the same
diff --git a/arch/um/include/asm/set_memory.h b/arch/um/include/asm/set_memory.h
new file mode 100644 (file)
index 0000000..24266c6
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/set_memory.h>
index f3b03d3..68e45e9 100644 (file)
@@ -28,7 +28,7 @@ struct time_travel_event {
 
 extern enum time_travel_mode time_travel_mode;
 
-void time_travel_sleep(unsigned long long duration);
+void time_travel_sleep(void);
 
 static inline void
 time_travel_set_event_fn(struct time_travel_event *e,
@@ -60,7 +60,7 @@ struct time_travel_event {
 
 #define time_travel_mode TT_MODE_OFF
 
-static inline void time_travel_sleep(unsigned long long duration)
+static inline void time_travel_sleep(void)
 {
 }
 
index 4e99fe0..16a51a8 100644 (file)
@@ -40,3 +40,6 @@ DEFINE(UML_CONFIG_UML_X86, CONFIG_UML_X86);
 #ifdef CONFIG_64BIT
 DEFINE(UML_CONFIG_64BIT, CONFIG_64BIT);
 #endif
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+DEFINE(UML_CONFIG_UML_TIME_TRAVEL_SUPPORT, CONFIG_UML_TIME_TRAVEL_SUPPORT);
+#endif
index 7cd1a10..7807de5 100644 (file)
@@ -8,11 +8,12 @@
 
 #include <linux/interrupt.h>
 #include <asm/ptrace.h>
+#include "irq_user.h"
 
-extern int um_request_irq(unsigned int irq, int fd, int type,
-                         irq_handler_t handler,
-                         unsigned long irqflags,  const char * devname,
-                         void *dev_id);
-void um_free_irq(unsigned int irq, void *dev);
-#endif
+#define UM_IRQ_ALLOC   -1
 
+int um_request_irq(int irq, int fd, enum um_irq_type type,
+                  irq_handler_t handler, unsigned long irqflags,
+                  const char *devname, void *dev_id);
+void um_free_irq(int irq, void *dev_id);
+#endif
index 107751d..07239e8 100644 (file)
@@ -9,25 +9,12 @@
 #include <sysdep/ptrace.h>
 #include <stdbool.h>
 
-struct irq_fd {
-       struct irq_fd *next;
-       void *id;
-       int fd;
-       int type;
-       int irq;
-       int events;
-       bool active;
-       bool pending;
-       bool purge;
+enum um_irq_type {
+       IRQ_READ,
+       IRQ_WRITE,
+       NUM_IRQ_TYPES,
 };
 
-#define IRQ_READ  0
-#define IRQ_WRITE 1
-#define IRQ_NONE 2
-#define MAX_IRQ_TYPE (IRQ_NONE + 1)
-
-
-
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
 extern void free_irq_by_fd(int fd);
index ccafb62..d8c279e 100644 (file)
@@ -19,7 +19,7 @@ extern int kmalloc_ok;
 #define UML_ROUND_UP(addr) \
        ((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK)
 
-extern unsigned long alloc_stack(int order, int atomic);
+extern unsigned long alloc_stack(int atomic);
 extern void free_stack(unsigned long stack, int order);
 
 struct pt_regs;
@@ -39,6 +39,8 @@ extern int is_syscall(unsigned long addr);
 
 extern void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
 
+extern void uml_pm_wake(void);
+
 extern int start_uml(void);
 extern void paging_init(void);
 
@@ -66,5 +68,6 @@ extern void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs);
 extern void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
 extern void fatal_sigsegv(void) __attribute__ ((noreturn));
 
+void um_idle_sleep(void);
 
 #endif
index f467d28..13d86f9 100644 (file)
@@ -233,6 +233,7 @@ extern void timer_set_signal_handler(void);
 extern void set_sigstack(void *sig_stack, int size);
 extern void remove_sigstack(void);
 extern void set_handler(int sig);
+extern void send_sigio_to_self(void);
 extern int change_sig(int signal, int on);
 extern void block_signals(void);
 extern void unblock_signals(void);
@@ -241,6 +242,7 @@ extern int set_signals(int enable);
 extern int set_signals_trace(int enable);
 extern int os_is_signal_stack(void);
 extern void deliver_alarm(void);
+extern void register_pm_wake_signal(void);
 
 /* util.c */
 extern void stack_protections(unsigned long address);
@@ -256,7 +258,7 @@ extern void os_warn(const char *fmt, ...)
        __attribute__ ((format (printf, 1, 2)));
 
 /* time.c */
-extern void os_idle_sleep(unsigned long long nsecs);
+extern void os_idle_sleep(void);
 extern int os_timer_create(void);
 extern int os_timer_set_interval(unsigned long long nsecs);
 extern int os_timer_one_shot(unsigned long long nsecs);
@@ -299,19 +301,29 @@ extern void reboot_skas(void);
 extern int os_waiting_for_events_epoll(void);
 extern void *os_epoll_get_data_pointer(int index);
 extern int os_epoll_triggered(int index, int events);
-extern int os_event_mask(int irq_type);
+extern int os_event_mask(enum um_irq_type irq_type);
 extern int os_setup_epoll(void);
 extern int os_add_epoll_fd(int events, int fd, void *data);
 extern int os_mod_epoll_fd(int events, int fd, void *data);
 extern int os_del_epoll_fd(int fd);
 extern void os_set_ioignore(void);
 extern void os_close_epoll_fd(void);
+extern void um_irqs_suspend(void);
+extern void um_irqs_resume(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
 extern int ignore_sigio_fd(int fd);
-extern void maybe_sigio_broken(int fd, int read);
-extern void sigio_broken(int fd, int read);
+extern void maybe_sigio_broken(int fd);
+extern void sigio_broken(int fd);
+/*
+ * unlocked versions for IRQ controller code.
+ *
+ * This is safe because it's used at suspend/resume and nothing
+ * else is running.
+ */
+extern int __add_sigio_fd(int fd);
+extern int __ignore_sigio_fd(int fd);
 
 /* prctl.c */
 extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
@@ -330,4 +342,7 @@ extern void unblock_signals_trace(void);
 extern void um_trace_signals_on(void);
 extern void um_trace_signals_off(void);
 
+/* time-travel */
+extern void deliver_time_travel_irqs(void);
+
 #endif
index 3577118..3741d23 100644 (file)
 #include <kern_util.h>
 #include <os.h>
 #include <irq_user.h>
+#include <irq_kern.h>
+#include <as-layout.h>
 
 
 extern void free_irqs(void);
 
 /* When epoll triggers we do not know why it did so
  * we can also have different IRQs for read and write.
- * This is why we keep a small irq_fd array for each fd -
+ * This is why we keep a small irq_reg array for each fd -
  * one entry per IRQ type
  */
+struct irq_reg {
+       void *id;
+       int irq;
+       /* it's cheaper to store this than to query it */
+       int events;
+       bool active;
+       bool pending;
+       bool wakeup;
+};
 
 struct irq_entry {
-       struct irq_entry *next;
+       struct list_head list;
        int fd;
-       struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+       struct irq_reg reg[NUM_IRQ_TYPES];
+       bool suspended;
+       bool sigio_workaround;
 };
 
-static struct irq_entry *active_fds;
-
 static DEFINE_SPINLOCK(irq_lock);
+static LIST_HEAD(active_fds);
+static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
 
-static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
 {
 /*
  * irq->active guards against reentry
@@ -49,23 +62,27 @@ static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
  */
        if (irq->active) {
                irq->active = false;
+
                do {
                        irq->pending = false;
                        do_IRQ(irq->irq, regs);
-               } while (irq->pending && (!irq->purge));
-               if (!irq->purge)
-                       irq->active = true;
+               } while (irq->pending);
+
+               irq->active = true;
        } else {
                irq->pending = true;
        }
 }
 
+void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+{
+       /* nothing */
+}
+
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
        struct irq_entry *irq_entry;
-       struct irq_fd *irq;
-
-       int n, i, j;
+       int n, i;
 
        while (1) {
                /* This is now lockless - epoll keeps back-referencesto the irqs
@@ -84,21 +101,18 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
                }
 
                for (i = 0; i < n ; i++) {
-                       /* Epoll back reference is the entry with 3 irq_fd
-                        * leaves - one for each irq type.
-                        */
-                       irq_entry = (struct irq_entry *)
-                               os_epoll_get_data_pointer(i);
-                       for (j = 0; j < MAX_IRQ_TYPE ; j++) {
-                               irq = irq_entry->irq_array[j];
-                               if (irq == NULL)
+                       enum um_irq_type t;
+
+                       irq_entry = os_epoll_get_data_pointer(i);
+
+                       for (t = 0; t < NUM_IRQ_TYPES; t++) {
+                               int events = irq_entry->reg[t].events;
+
+                               if (!events)
                                        continue;
-                               if (os_epoll_triggered(i, irq->events) > 0)
-                                       irq_io_loop(irq, regs);
-                               if (irq->purge) {
-                                       irq_entry->irq_array[j] = NULL;
-                                       kfree(irq);
-                               }
+
+                               if (os_epoll_triggered(i, events) > 0)
+                                       irq_io_loop(&irq_entry->reg[t], regs);
                        }
                }
        }
@@ -106,32 +120,59 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
        free_irqs();
 }
 
-static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
-       int i;
-       int events = 0;
-       struct irq_fd *irq;
+       struct irq_entry *walk;
 
-       for (i = 0; i < MAX_IRQ_TYPE ; i++) {
-               irq = irq_entry->irq_array[i];
-               if (irq != NULL)
-                       events = irq->events | events;
-       }
-       if (events > 0) {
-       /* os_add_epoll will call os_mod_epoll if this already exists */
-               return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+       lockdep_assert_held(&irq_lock);
+
+       list_for_each_entry(walk, &active_fds, list) {
+               if (walk->fd == fd)
+                       return walk;
        }
-       /* No events - delete */
-       return os_del_epoll_fd(irq_entry->fd);
+
+       return NULL;
 }
 
+static void free_irq_entry(struct irq_entry *to_free, bool remove)
+{
+       if (!to_free)
+               return;
+
+       if (remove)
+               os_del_epoll_fd(to_free->fd);
+       list_del(&to_free->list);
+       kfree(to_free);
+}
+
+static bool update_irq_entry(struct irq_entry *entry)
+{
+       enum um_irq_type i;
+       int events = 0;
+
+       for (i = 0; i < NUM_IRQ_TYPES; i++)
+               events |= entry->reg[i].events;
+
+       if (events) {
+               /* will modify (instead of add) if needed */
+               os_add_epoll_fd(events, entry->fd, entry);
+               return true;
+       }
+
+       os_del_epoll_fd(entry->fd);
+       return false;
+}
 
+static void update_or_free_irq_entry(struct irq_entry *entry)
+{
+       if (!update_irq_entry(entry))
+               free_irq_entry(entry, false);
+}
 
-static int activate_fd(int irq, int fd, int type, void *dev_id)
+static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
 {
-       struct irq_fd *new_fd;
        struct irq_entry *irq_entry;
-       int i, err, events;
+       int err, events = os_event_mask(type);
        unsigned long flags;
 
        err = os_set_fd_async(fd);
@@ -139,73 +180,34 @@ static int activate_fd(int irq, int fd, int type, void *dev_id)
                goto out;
 
        spin_lock_irqsave(&irq_lock, flags);
-
-       /* Check if we have an entry for this fd */
-
-       err = -EBUSY;
-       for (irq_entry = active_fds;
-               irq_entry != NULL; irq_entry = irq_entry->next) {
-               if (irq_entry->fd == fd)
-                       break;
-       }
-
-       if (irq_entry == NULL) {
-               /* This needs to be atomic as it may be called from an
-                * IRQ context.
-                */
-               irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
-               if (irq_entry == NULL) {
-                       printk(KERN_ERR
-                               "Failed to allocate new IRQ entry\n");
+       irq_entry = get_irq_entry_by_fd(fd);
+       if (irq_entry) {
+               /* cannot register the same FD twice with the same type */
+               if (WARN_ON(irq_entry->reg[type].events)) {
+                       err = -EALREADY;
                        goto out_unlock;
                }
-               irq_entry->fd = fd;
-               for (i = 0; i < MAX_IRQ_TYPE; i++)
-                       irq_entry->irq_array[i] = NULL;
-               irq_entry->next = active_fds;
-               active_fds = irq_entry;
-       }
-
-       /* Check if we are trying to re-register an interrupt for a
-        * particular fd
-        */
 
-       if (irq_entry->irq_array[type] != NULL) {
-               printk(KERN_ERR
-                       "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
-                       irq, fd, type, dev_id
-               );
-               goto out_unlock;
+               /* temporarily disable to avoid IRQ-side locking */
+               os_del_epoll_fd(fd);
        } else {
-               /* New entry for this fd */
-
-               err = -ENOMEM;
-               new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
-               if (new_fd == NULL)
+               irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
+               if (!irq_entry) {
+                       err = -ENOMEM;
                        goto out_unlock;
-
-               events = os_event_mask(type);
-
-               *new_fd = ((struct irq_fd) {
-                       .id             = dev_id,
-                       .irq            = irq,
-                       .type           = type,
-                       .events         = events,
-                       .active         = true,
-                       .pending        = false,
-                       .purge          = false
-               });
-               /* Turn off any IO on this fd - allows us to
-                * avoid locking the IRQ loop
-                */
-               os_del_epoll_fd(irq_entry->fd);
-               irq_entry->irq_array[type] = new_fd;
+               }
+               irq_entry->fd = fd;
+               list_add_tail(&irq_entry->list, &active_fds);
+               maybe_sigio_broken(fd);
        }
 
-       /* Turn back IO on with the correct (new) IO event mask */
-       assign_epoll_events_to_irq(irq_entry);
+       irq_entry->reg[type].id = dev_id;
+       irq_entry->reg[type].irq = irq;
+       irq_entry->reg[type].active = true;
+       irq_entry->reg[type].events = events;
+
+       WARN_ON(!update_irq_entry(irq_entry));
        spin_unlock_irqrestore(&irq_lock, flags);
-       maybe_sigio_broken(fd, (type != IRQ_NONE));
 
        return 0;
 out_unlock:
@@ -215,104 +217,10 @@ out:
 }
 
 /*
- * Walk the IRQ list and dispose of any unused entries.
- * Should be done under irq_lock.
+ * Remove the entry or entries for a specific FD, if you
+ * don't want to remove all the possible entries then use
+ * um_free_irq() or deactivate_fd() instead.
  */
-
-static void garbage_collect_irq_entries(void)
-{
-       int i;
-       bool reap;
-       struct irq_entry *walk;
-       struct irq_entry *previous = NULL;
-       struct irq_entry *to_free;
-
-       if (active_fds == NULL)
-               return;
-       walk = active_fds;
-       while (walk != NULL) {
-               reap = true;
-               for (i = 0; i < MAX_IRQ_TYPE ; i++) {
-                       if (walk->irq_array[i] != NULL) {
-                               reap = false;
-                               break;
-                       }
-               }
-               if (reap) {
-                       if (previous == NULL)
-                               active_fds = walk->next;
-                       else
-                               previous->next = walk->next;
-                       to_free = walk;
-               } else {
-                       to_free = NULL;
-               }
-               walk = walk->next;
-               kfree(to_free);
-       }
-}
-
-/*
- * Walk the IRQ list and get the descriptor for our FD
- */
-
-static struct irq_entry *get_irq_entry_by_fd(int fd)
-{
-       struct irq_entry *walk = active_fds;
-
-       while (walk != NULL) {
-               if (walk->fd == fd)
-                       return walk;
-               walk = walk->next;
-       }
-       return NULL;
-}
-
-
-/*
- * Walk the IRQ list and dispose of an entry for a specific
- * device, fd and number. Note - if sharing an IRQ for read
- * and writefor the same FD it will be disposed in either case.
- * If this behaviour is undesirable use different IRQ ids.
- */
-
-#define IGNORE_IRQ 1
-#define IGNORE_DEV (1<<1)
-
-static void do_free_by_irq_and_dev(
-       struct irq_entry *irq_entry,
-       unsigned int irq,
-       void *dev,
-       int flags
-)
-{
-       int i;
-       struct irq_fd *to_free;
-
-       for (i = 0; i < MAX_IRQ_TYPE ; i++) {
-               if (irq_entry->irq_array[i] != NULL) {
-                       if (
-                       ((flags & IGNORE_IRQ) ||
-                               (irq_entry->irq_array[i]->irq == irq)) &&
-                       ((flags & IGNORE_DEV) ||
-                               (irq_entry->irq_array[i]->id == dev))
-                       ) {
-                               /* Turn off any IO on this fd - allows us to
-                                * avoid locking the IRQ loop
-                                */
-                               os_del_epoll_fd(irq_entry->fd);
-                               to_free = irq_entry->irq_array[i];
-                               irq_entry->irq_array[i] = NULL;
-                               assign_epoll_events_to_irq(irq_entry);
-                               if (to_free->active)
-                                       to_free->purge = true;
-                               else
-                                       kfree(to_free);
-                       }
-               }
-       }
-}
-
 void free_irq_by_fd(int fd)
 {
        struct irq_entry *to_free;
@@ -320,58 +228,64 @@ void free_irq_by_fd(int fd)
 
        spin_lock_irqsave(&irq_lock, flags);
        to_free = get_irq_entry_by_fd(fd);
-       if (to_free != NULL) {
-               do_free_by_irq_and_dev(
-                       to_free,
-                       -1,
-                       NULL,
-                       IGNORE_IRQ | IGNORE_DEV
-               );
-       }
-       garbage_collect_irq_entries();
+       free_irq_entry(to_free, true);
        spin_unlock_irqrestore(&irq_lock, flags);
 }
 EXPORT_SYMBOL(free_irq_by_fd);
 
 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-       struct irq_entry *to_free;
+       struct irq_entry *entry;
        unsigned long flags;
 
        spin_lock_irqsave(&irq_lock, flags);
-       to_free = active_fds;
-       while (to_free != NULL) {
-               do_free_by_irq_and_dev(
-                       to_free,
-                       irq,
-                       dev,
-                       0
-               );
-               to_free = to_free->next;
+       list_for_each_entry(entry, &active_fds, list) {
+               enum um_irq_type i;
+
+               for (i = 0; i < NUM_IRQ_TYPES; i++) {
+                       struct irq_reg *reg = &entry->reg[i];
+
+                       if (!reg->events)
+                               continue;
+                       if (reg->irq != irq)
+                               continue;
+                       if (reg->id != dev)
+                               continue;
+
+                       os_del_epoll_fd(entry->fd);
+                       reg->events = 0;
+                       update_or_free_irq_entry(entry);
+                       goto out;
+               }
        }
-       garbage_collect_irq_entries();
+out:
        spin_unlock_irqrestore(&irq_lock, flags);
 }
 
-
 void deactivate_fd(int fd, int irqnum)
 {
-       struct irq_entry *to_free;
+       struct irq_entry *entry;
        unsigned long flags;
+       enum um_irq_type i;
 
        os_del_epoll_fd(fd);
+
        spin_lock_irqsave(&irq_lock, flags);
-       to_free = get_irq_entry_by_fd(fd);
-       if (to_free != NULL) {
-               do_free_by_irq_and_dev(
-                       to_free,
-                       irqnum,
-                       NULL,
-                       IGNORE_DEV
-               );
+       entry = get_irq_entry_by_fd(fd);
+       if (!entry)
+               goto out;
+
+       for (i = 0; i < NUM_IRQ_TYPES; i++) {
+               if (!entry->reg[i].events)
+                       continue;
+               if (entry->reg[i].irq == irqnum)
+                       entry->reg[i].events = 0;
        }
-       garbage_collect_irq_entries();
+
+       update_or_free_irq_entry(entry);
+out:
        spin_unlock_irqrestore(&irq_lock, flags);
+
        ignore_sigio_fd(fd);
 }
 EXPORT_SYMBOL(deactivate_fd);
@@ -384,24 +298,17 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-       struct irq_entry *to_free;
+       struct irq_entry *entry;
 
        /* Stop IO. The IRQ loop has no lock so this is our
         * only way of making sure we are safe to dispose
         * of all IRQ handlers
         */
        os_set_ioignore();
-       to_free = active_fds;
-       while (to_free != NULL) {
-               do_free_by_irq_and_dev(
-                       to_free,
-                       -1,
-                       NULL,
-                       IGNORE_IRQ | IGNORE_DEV
-               );
-               to_free = to_free->next;
-       }
-       /* don't garbage collect - we can no longer call kfree() here */
+
+       /* we can no longer call kfree() here so just deactivate */
+       list_for_each_entry(entry, &active_fds, list)
+               os_del_epoll_fd(entry->fd);
        os_close_epoll_fd();
        return 0;
 }
@@ -421,31 +328,146 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
        return 1;
 }
 
-void um_free_irq(unsigned int irq, void *dev)
+void um_free_irq(int irq, void *dev)
 {
+       if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq))
+               return;
+
        free_irq_by_irq_and_dev(irq, dev);
        free_irq(irq, dev);
+       clear_bit(irq, irqs_allocated);
 }
 EXPORT_SYMBOL(um_free_irq);
 
-int um_request_irq(unsigned int irq, int fd, int type,
-                  irq_handler_t handler,
-                  unsigned long irqflags, const char * devname,
-                  void *dev_id)
+int um_request_irq(int irq, int fd, enum um_irq_type type,
+                  irq_handler_t handler, unsigned long irqflags,
+                  const char *devname, void *dev_id)
 {
        int err;
 
+       if (irq == UM_IRQ_ALLOC) {
+               int i;
+
+               for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) {
+                       if (!test_and_set_bit(i, irqs_allocated)) {
+                               irq = i;
+                               break;
+                       }
+               }
+       }
+
+       if (irq < 0)
+               return -ENOSPC;
+
        if (fd != -1) {
                err = activate_fd(irq, fd, type, dev_id);
                if (err)
-                       return err;
+                       goto error;
        }
 
-       return request_irq(irq, handler, irqflags, devname, dev_id);
-}
+       err = request_irq(irq, handler, irqflags, devname, dev_id);
+       if (err < 0)
+               goto error;
 
+       return irq;
+error:
+       clear_bit(irq, irqs_allocated);
+       return err;
+}
 EXPORT_SYMBOL(um_request_irq);
 
+#ifdef CONFIG_PM_SLEEP
+void um_irqs_suspend(void)
+{
+       struct irq_entry *entry;
+       unsigned long flags;
+
+       sig_info[SIGIO] = sigio_handler_suspend;
+
+       spin_lock_irqsave(&irq_lock, flags);
+       list_for_each_entry(entry, &active_fds, list) {
+               enum um_irq_type t;
+               bool wake = false;
+
+               for (t = 0; t < NUM_IRQ_TYPES; t++) {
+                       if (!entry->reg[t].events)
+                               continue;
+
+                       /*
+                        * For the SIGIO_WRITE_IRQ, which is used to handle the
+                        * SIGIO workaround thread, we need special handling:
+                        * enable wake for it itself, but below we tell it about
+                        * any FDs that should be suspended.
+                        */
+                       if (entry->reg[t].wakeup ||
+                           entry->reg[t].irq == SIGIO_WRITE_IRQ) {
+                               wake = true;
+                               break;
+                       }
+               }
+
+               if (!wake) {
+                       entry->suspended = true;
+                       os_clear_fd_async(entry->fd);
+                       entry->sigio_workaround =
+                               !__ignore_sigio_fd(entry->fd);
+               }
+       }
+       spin_unlock_irqrestore(&irq_lock, flags);
+}
+
+void um_irqs_resume(void)
+{
+       struct irq_entry *entry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq_lock, flags);
+       list_for_each_entry(entry, &active_fds, list) {
+               if (entry->suspended) {
+                       int err = os_set_fd_async(entry->fd);
+
+                       WARN(err < 0, "os_set_fd_async returned %d\n", err);
+                       entry->suspended = false;
+
+                       if (entry->sigio_workaround) {
+                               err = __add_sigio_fd(entry->fd);
+                               WARN(err < 0, "add_sigio_returned %d\n", err);
+                       }
+               }
+       }
+       spin_unlock_irqrestore(&irq_lock, flags);
+
+       sig_info[SIGIO] = sigio_handler;
+       send_sigio_to_self();
+}
+
+static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
+{
+       struct irq_entry *entry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq_lock, flags);
+       list_for_each_entry(entry, &active_fds, list) {
+               enum um_irq_type t;
+
+               for (t = 0; t < NUM_IRQ_TYPES; t++) {
+                       if (!entry->reg[t].events)
+                               continue;
+
+                       if (entry->reg[t].irq != d->irq)
+                               continue;
+                       entry->reg[t].wakeup = on;
+                       goto unlock;
+               }
+       }
+unlock:
+       spin_unlock_irqrestore(&irq_lock, flags);
+       return 0;
+}
+#else
+#define normal_irq_set_wake NULL
+#endif
+
 /*
  * irq_chip must define at least enable/disable and ack when
  * the edge handler is used.
@@ -454,7 +476,7 @@ static void dummy(struct irq_data *d)
 {
 }
 
-/* This is used for everything else than the timer. */
+/* This is used for everything other than the timer. */
 static struct irq_chip normal_irq_type = {
        .name = "SIGIO",
        .irq_disable = dummy,
@@ -462,10 +484,11 @@ static struct irq_chip normal_irq_type = {
        .irq_ack = dummy,
        .irq_mask = dummy,
        .irq_unmask = dummy,
+       .irq_set_wake = normal_irq_set_wake,
 };
 
-static struct irq_chip SIGVTALRM_irq_type = {
-       .name = "SIGVTALRM",
+static struct irq_chip alarm_irq_type = {
+       .name = "SIGALRM",
        .irq_disable = dummy,
        .irq_enable = dummy,
        .irq_ack = dummy,
@@ -477,10 +500,9 @@ void __init init_IRQ(void)
 {
        int i;
 
-       irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
-
+       irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
 
-       for (i = 1; i <= LAST_IRQ; i++)
+       for (i = 1; i < NR_IRQS; i++)
                irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
        /* Initialize EPOLL Loop */
        os_setup_epoll();
index 0fcdc37..2a986ec 100644 (file)
@@ -32,6 +32,7 @@
 #include <os.h>
 #include <skas.h>
 #include <linux/time-internal.h>
+#include <asm/set_memory.h>
 
 /*
  * This is a per-cpu array.  A processor only modifies its entry and it only
@@ -62,16 +63,18 @@ void free_stack(unsigned long stack, int order)
        free_pages(stack, order);
 }
 
-unsigned long alloc_stack(int order, int atomic)
+unsigned long alloc_stack(int atomic)
 {
-       unsigned long page;
+       unsigned long addr;
        gfp_t flags = GFP_KERNEL;
 
        if (atomic)
                flags = GFP_ATOMIC;
-       page = __get_free_pages(flags, order);
+       addr = __get_free_pages(flags, 1);
 
-       return page;
+       set_memory_ro(addr, 1);
+
+       return addr + PAGE_SIZE;
 }
 
 static inline void set_current(struct task_struct *task)
@@ -203,15 +206,12 @@ void initial_thread_cb(void (*proc)(void *), void *arg)
        kmalloc_ok = save_kmalloc_ok;
 }
 
-static void um_idle_sleep(void)
+void um_idle_sleep(void)
 {
-       unsigned long long duration = UM_NSEC_PER_SEC;
-
-       if (time_travel_mode != TT_MODE_OFF) {
-               time_travel_sleep(duration);
-       } else {
-               os_idle_sleep(duration);
-       }
+       if (time_travel_mode != TT_MODE_OFF)
+               time_travel_sleep();
+       else
+               os_idle_sleep();
 }
 
 void arch_cpu_idle(void)
index d1cffc2..5085a50 100644 (file)
@@ -25,7 +25,7 @@ int write_sigio_irq(int fd)
 
        err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt,
                             0, "write sigio", NULL);
-       if (err) {
+       if (err < 0) {
                printk(KERN_ERR "write_sigio_irq : um_request_irq failed, "
                       "err = %d\n", err);
                return -1;
index 3d109ff..f4db89b 100644 (file)
@@ -31,6 +31,7 @@ static bool time_travel_start_set;
 static unsigned long long time_travel_start;
 static unsigned long long time_travel_time;
 static LIST_HEAD(time_travel_events);
+static LIST_HEAD(time_travel_irqs);
 static unsigned long long time_travel_timer_interval;
 static unsigned long long time_travel_next_event;
 static struct time_travel_event time_travel_timer_event;
@@ -46,6 +47,9 @@ static void time_travel_set_time(unsigned long long ns)
        if (unlikely(ns < time_travel_time))
                panic("time-travel: time goes backwards %lld -> %lld\n",
                      time_travel_time, ns);
+       else if (unlikely(ns >= S64_MAX))
+               panic("The system was going to sleep forever, aborting");
+
        time_travel_time = ns;
 }
 
@@ -180,6 +184,14 @@ static void time_travel_ext_update_request(unsigned long long time)
            time == time_travel_ext_prev_request)
                return;
 
+       /*
+        * if we're running and are allowed to run past the request
+        * then we don't need to update it either
+        */
+       if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
+           time < time_travel_ext_free_until)
+               return;
+
        time_travel_ext_prev_request = time;
        time_travel_ext_prev_request_valid = true;
        time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
@@ -187,7 +199,13 @@ static void time_travel_ext_update_request(unsigned long long time)
 
 void __time_travel_propagate_time(void)
 {
+       static unsigned long long last_propagated;
+
+       if (last_propagated == time_travel_time)
+               return;
+
        time_travel_ext_req(UM_TIMETRAVEL_UPDATE, time_travel_time);
+       last_propagated = time_travel_time;
 }
 EXPORT_SYMBOL_GPL(__time_travel_propagate_time);
 
@@ -214,6 +232,7 @@ static void time_travel_ext_wait(bool idle)
        };
 
        time_travel_ext_prev_request_valid = false;
+       time_travel_ext_free_until_valid = false;
        time_travel_ext_waiting++;
 
        time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@@ -260,11 +279,6 @@ static void __time_travel_add_event(struct time_travel_event *e,
        struct time_travel_event *tmp;
        bool inserted = false;
 
-       if (WARN(time_travel_mode == TT_MODE_BASIC &&
-                e != &time_travel_timer_event,
-                "only timer events can be handled in basic mode"))
-               return;
-
        if (e->pending)
                return;
 
@@ -311,6 +325,35 @@ void time_travel_periodic_timer(struct time_travel_event *e)
        deliver_alarm();
 }
 
+void deliver_time_travel_irqs(void)
+{
+       struct time_travel_event *e;
+       unsigned long flags;
+
+       /*
+        * Don't do anything for most cases. Note that because here we have
+        * to disable IRQs (and re-enable later) we'll actually recurse at
+        * the end of the function, so this is strictly necessary.
+        */
+       if (likely(list_empty(&time_travel_irqs)))
+               return;
+
+       local_irq_save(flags);
+       irq_enter();
+       while ((e = list_first_entry_or_null(&time_travel_irqs,
+                                            struct time_travel_event,
+                                            list))) {
+               WARN(e->time != time_travel_time,
+                    "time moved from %lld to %lld before IRQ delivery\n",
+                    time_travel_time, e->time);
+               list_del(&e->list);
+               e->pending = false;
+               e->fn(e);
+       }
+       irq_exit();
+       local_irq_restore(flags);
+}
+
 static void time_travel_deliver_event(struct time_travel_event *e)
 {
        if (e == &time_travel_timer_event) {
@@ -319,6 +362,14 @@ static void time_travel_deliver_event(struct time_travel_event *e)
                 * by itself, so must handle it specially here
                 */
                e->fn(e);
+       } else if (irqs_disabled()) {
+               list_add_tail(&e->list, &time_travel_irqs);
+               /*
+                * set pending again, it was set to false when the
+                * event was deleted from the original list, but
+                * now it's still pending until we deliver the IRQ.
+                */
+               e->pending = true;
        } else {
                unsigned long flags;
 
@@ -404,9 +455,14 @@ static void time_travel_oneshot_timer(struct time_travel_event *e)
        deliver_alarm();
 }
 
-void time_travel_sleep(unsigned long long duration)
+void time_travel_sleep(void)
 {
-       unsigned long long next = time_travel_time + duration;
+       /*
+        * Wait "forever" (using S64_MAX because there are some potential
+        * wrapping issues, especially with the current TT_MODE_EXTERNAL
+        * controller application.
+        */
+       unsigned long long next = S64_MAX;
 
        if (time_travel_mode == TT_MODE_BASIC)
                os_timer_disable();
@@ -483,6 +539,7 @@ invalid_number:
 #define time_travel_start_set 0
 #define time_travel_start 0
 #define time_travel_time 0
+#define time_travel_ext_waiting 0
 
 static inline void time_travel_update_time(unsigned long long ns, bool retearly)
 {
@@ -628,7 +685,8 @@ static u64 timer_read(struct clocksource *cs)
                 * "what do I do next" and onstack event we use to know when
                 * to return from time_travel_update_time().
                 */
-               if (!irqs_disabled() && !in_interrupt() && !in_softirq())
+               if (!irqs_disabled() && !in_interrupt() && !in_softirq() &&
+                   !time_travel_ext_waiting)
                        time_travel_update_time(time_travel_time +
                                                TIMER_MULTIPLIER,
                                                false);
@@ -673,10 +731,8 @@ void read_persistent_clock64(struct timespec64 *ts)
 {
        long long nsecs;
 
-       if (time_travel_start_set)
+       if (time_travel_mode != TT_MODE_OFF)
                nsecs = time_travel_start + time_travel_time;
-       else if (time_travel_mode == TT_MODE_EXTERNAL)
-               nsecs = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
        else
                nsecs = os_persistent_clock_emulation();
 
@@ -686,6 +742,25 @@ void read_persistent_clock64(struct timespec64 *ts)
 
 void __init time_init(void)
 {
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+       switch (time_travel_mode) {
+       case TT_MODE_EXTERNAL:
+               time_travel_start = time_travel_ext_req(UM_TIMETRAVEL_GET_TOD, -1);
+               /* controller gave us the *current* time, so adjust by that */
+               time_travel_ext_get_time();
+               time_travel_start -= time_travel_time;
+               break;
+       case TT_MODE_INFCPU:
+       case TT_MODE_BASIC:
+               if (!time_travel_start_set)
+                       time_travel_start = os_persistent_clock_emulation();
+               break;
+       case TT_MODE_OFF:
+               /* we just read the host clock with os_persistent_clock_emulation() */
+               break;
+       }
+#endif
+
        timer_set_signal_handler();
        late_time_init = um_timer_setup;
 }
index 6177679..437d1f1 100644 (file)
@@ -608,3 +608,57 @@ void force_flush_all(void)
                vma = vma->vm_next;
        }
 }
+
+struct page_change_data {
+       unsigned int set_mask, clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
+{
+       struct page_change_data *cdata = data;
+       pte_t pte = READ_ONCE(*ptep);
+
+       pte_clear_bits(pte, cdata->clear_mask);
+       pte_set_bits(pte, cdata->set_mask);
+
+       set_pte(ptep, pte);
+       return 0;
+}
+
+static int change_memory(unsigned long start, unsigned long pages,
+                        unsigned int set_mask, unsigned int clear_mask)
+{
+       unsigned long size = pages * PAGE_SIZE;
+       struct page_change_data data;
+       int ret;
+
+       data.set_mask = set_mask;
+       data.clear_mask = clear_mask;
+
+       ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+                                 &data);
+
+       flush_tlb_kernel_range(start, start + size);
+
+       return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+       return change_memory(addr, numpages, 0, _PAGE_RW);
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+       return change_memory(addr, numpages, _PAGE_RW, 0);
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+       return -EOPNOTSUPP;
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+       return -EOPNOTSUPP;
+}
index 76b3729..31d356b 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/kmsg_dump.h>
+#include <linux/suspend.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
@@ -377,3 +378,69 @@ void *text_poke(void *addr, const void *opcode, size_t len)
 void text_poke_sync(void)
 {
 }
+
+void uml_pm_wake(void)
+{
+       pm_system_wakeup();
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int um_suspend_valid(suspend_state_t state)
+{
+       return state == PM_SUSPEND_MEM;
+}
+
+static int um_suspend_prepare(void)
+{
+       um_irqs_suspend();
+       return 0;
+}
+
+static int um_suspend_enter(suspend_state_t state)
+{
+       if (WARN_ON(state != PM_SUSPEND_MEM))
+               return -EINVAL;
+
+       /*
+        * This is identical to the idle sleep, but we've just
+        * (during suspend) turned off all interrupt sources
+        * except for the ones we want, so now we can only wake
+        * up on something we actually want to wake up on. All
+        * timing has also been suspended.
+        */
+       um_idle_sleep();
+       return 0;
+}
+
+static void um_suspend_finish(void)
+{
+       um_irqs_resume();
+}
+
+const struct platform_suspend_ops um_suspend_ops = {
+       .valid = um_suspend_valid,
+       .prepare = um_suspend_prepare,
+       .enter = um_suspend_enter,
+       .finish = um_suspend_finish,
+};
+
+static int init_pm_wake_signal(void)
+{
+       /*
+        * In external time-travel mode we can't use signals to wake up
+        * since that would mess with the scheduling. We'll have to do
+        * some additional work to support wakeup on virtio devices or
+        * similar, perhaps implementing a fake RTC controller that can
+        * trigger wakeup (and request the appropriate scheduling from
+        * the external scheduler when going to suspend.)
+        */
+       if (time_travel_mode != TT_MODE_EXTERNAL)
+               register_pm_wake_signal();
+
+       suspend_set_ops(&um_suspend_ops);
+
+       return 0;
+}
+
+late_initcall(init_pm_wake_signal);
+#endif
index 839915b..77ac50b 100644 (file)
@@ -10,6 +10,8 @@ obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
        registers.o sigio.o signal.o start_up.o time.o tty.o \
        umid.o user_syms.o util.o drivers/ skas/
 
+CFLAGS_signal.o += -Wframe-larger-than=4096
+
 obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
 
 USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
index 9fa6e41..feb48d7 100644 (file)
@@ -45,7 +45,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
        unsigned long stack, sp;
        int pid, fds[2], ret, n;
 
-       stack = alloc_stack(0, __cant_sleep());
+       stack = alloc_stack(__cant_sleep());
        if (stack == 0)
                return -ENOMEM;
 
@@ -116,7 +116,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
        unsigned long stack, sp;
        int pid, status, err;
 
-       stack = alloc_stack(0, __cant_sleep());
+       stack = alloc_stack(__cant_sleep());
        if (stack == 0)
                return -ENOMEM;
 
index d508310..98ea910 100644 (file)
@@ -45,10 +45,10 @@ int os_epoll_triggered(int index, int events)
  * access to the right includes/defines for EPOLL constants.
  */
 
-int os_event_mask(int irq_type)
+int os_event_mask(enum um_irq_type irq_type)
 {
        if (irq_type == IRQ_READ)
-               return EPOLLIN | EPOLLPRI;
+               return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP;
        if (irq_type == IRQ_WRITE)
                return EPOLLOUT;
        return 0;
index 7555808..6597ea1 100644 (file)
@@ -164,45 +164,55 @@ static void update_thread(void)
        set_signals_trace(flags);
 }
 
-int add_sigio_fd(int fd)
+int __add_sigio_fd(int fd)
 {
        struct pollfd *p;
-       int err = 0, i, n;
+       int err, i, n;
 
-       sigio_lock();
        for (i = 0; i < all_sigio_fds.used; i++) {
                if (all_sigio_fds.poll[i].fd == fd)
                        break;
        }
        if (i == all_sigio_fds.used)
-               goto out;
+               return -ENOSPC;
 
        p = &all_sigio_fds.poll[i];
 
        for (i = 0; i < current_poll.used; i++) {
                if (current_poll.poll[i].fd == fd)
-                       goto out;
+                       return 0;
        }
 
        n = current_poll.used;
        err = need_poll(&next_poll, n + 1);
        if (err)
-               goto out;
+               return err;
 
        memcpy(next_poll.poll, current_poll.poll,
               current_poll.used * sizeof(struct pollfd));
        next_poll.poll[n] = *p;
        next_poll.used = n + 1;
        update_thread();
- out:
+
+       return 0;
+}
+
+
+int add_sigio_fd(int fd)
+{
+       int err;
+
+       sigio_lock();
+       err = __add_sigio_fd(fd);
        sigio_unlock();
+
        return err;
 }
 
-int ignore_sigio_fd(int fd)
+int __ignore_sigio_fd(int fd)
 {
        struct pollfd *p;
-       int err = 0, i, n = 0;
+       int err, i, n = 0;
 
        /*
         * This is called from exitcalls elsewhere in UML - if
@@ -212,17 +222,16 @@ int ignore_sigio_fd(int fd)
        if (write_sigio_pid == -1)
                return -EIO;
 
-       sigio_lock();
        for (i = 0; i < current_poll.used; i++) {
                if (current_poll.poll[i].fd == fd)
                        break;
        }
        if (i == current_poll.used)
-               goto out;
+               return -ENOENT;
 
        err = need_poll(&next_poll, current_poll.used - 1);
        if (err)
-               goto out;
+               return err;
 
        for (i = 0; i < current_poll.used; i++) {
                p = &current_poll.poll[i];
@@ -232,8 +241,18 @@ int ignore_sigio_fd(int fd)
        next_poll.used = current_poll.used - 1;
 
        update_thread();
- out:
+
+       return 0;
+}
+
+int ignore_sigio_fd(int fd)
+{
+       int err;
+
+       sigio_lock();
+       err = __ignore_sigio_fd(fd);
        sigio_unlock();
+
        return err;
 }
 
@@ -336,7 +355,7 @@ out_close1:
        close(l_write_sigio_fds[1]);
 }
 
-void sigio_broken(int fd, int read)
+void sigio_broken(int fd)
 {
        int err;
 
@@ -352,7 +371,7 @@ void sigio_broken(int fd, int read)
 
        all_sigio_fds.poll[all_sigio_fds.used++] =
                ((struct pollfd) { .fd          = fd,
-                                  .events      = read ? POLLIN : POLLOUT,
+                                  .events      = POLLIN,
                                   .revents     = 0 });
 out:
        sigio_unlock();
@@ -360,17 +379,16 @@ out:
 
 /* Changed during early boot */
 static int pty_output_sigio;
-static int pty_close_sigio;
 
-void maybe_sigio_broken(int fd, int read)
+void maybe_sigio_broken(int fd)
 {
        if (!isatty(fd))
                return;
 
-       if ((read || pty_output_sigio) && (!read || pty_close_sigio))
+       if (pty_output_sigio)
                return;
 
-       sigio_broken(fd, read);
+       sigio_broken(fd);
 }
 
 static void sigio_cleanup(void)
@@ -514,19 +532,6 @@ static void tty_output(int master, int slave)
                printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n);
 }
 
-static void tty_close(int master, int slave)
-{
-       printk(UM_KERN_INFO "Checking that host ptys support SIGIO on "
-              "close...");
-
-       close(slave);
-       if (got_sigio) {
-               printk(UM_KERN_CONT "Yes\n");
-               pty_close_sigio = 1;
-       } else
-               printk(UM_KERN_CONT "No, enabling workaround\n");
-}
-
 static void __init check_sigio(void)
 {
        if ((access("/dev/ptmx", R_OK) < 0) &&
@@ -536,7 +541,6 @@ static void __init check_sigio(void)
                return;
        }
        check_one_sigio(tty_output);
-       check_one_sigio(tty_close);
 }
 
 /* Here because it only does the SIGIO testing for now */
index b58bc68..96f511d 100644 (file)
@@ -136,6 +136,16 @@ void set_sigstack(void *sig_stack, int size)
                panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
+static void sigusr1_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
+{
+       uml_pm_wake();
+}
+
+void register_pm_wake_signal(void)
+{
+       set_handler(SIGUSR1);
+}
+
 static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
        [SIGSEGV] = sig_handler,
        [SIGBUS] = sig_handler,
@@ -145,7 +155,9 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 
        [SIGIO] = sig_handler,
        [SIGWINCH] = sig_handler,
-       [SIGALRM] = timer_alarm_handler
+       [SIGALRM] = timer_alarm_handler,
+
+       [SIGUSR1] = sigusr1_handler,
 };
 
 static void hard_handler(int sig, siginfo_t *si, void *p)
@@ -222,6 +234,11 @@ void set_handler(int sig)
                panic("sigprocmask failed - errno = %d\n", errno);
 }
 
+void send_sigio_to_self(void)
+{
+       kill(os_getpid(), SIGIO);
+}
+
 int change_sig(int signal, int on)
 {
        sigset_t sigset;
@@ -254,6 +271,9 @@ void unblock_signals(void)
                return;
 
        signals_enabled = 1;
+#ifdef UML_CONFIG_UML_TIME_TRAVEL_SUPPORT
+       deliver_time_travel_irqs();
+#endif
 
        /*
         * We loop because the IRQ handler returns with interrupts off.  So,
index 4fb877b..0621d52 100644 (file)
@@ -400,7 +400,20 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
                if (WIFSTOPPED(status)) {
                        int sig = WSTOPSIG(status);
 
-                       ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
+                       /* These signal handlers need the si argument.
+                        * The SIGIO and SIGALARM handlers which constitute the
+                        * majority of invocations, do not use it.
+                        */
+                       switch (sig) {
+                       case SIGSEGV:
+                       case SIGTRAP:
+                       case SIGILL:
+                       case SIGBUS:
+                       case SIGFPE:
+                       case SIGWINCH:
+                               ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
+                               break;
+                       }
 
                        switch (sig) {
                        case SIGSEGV:
index 90f6de2..a61cbf7 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <stddef.h>
+#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
 #include <time.h>
@@ -99,19 +100,9 @@ long long os_nsecs(void)
 }
 
 /**
- * os_idle_sleep() - sleep for a given time of nsecs
- * @nsecs: nanoseconds to sleep
+ * os_idle_sleep() - sleep until interrupted
  */
-void os_idle_sleep(unsigned long long nsecs)
+void os_idle_sleep(void)
 {
-       struct timespec ts = {
-               .tv_sec  = nsecs / UM_NSEC_PER_SEC,
-               .tv_nsec = nsecs % UM_NSEC_PER_SEC
-       };
-
-       /*
-        * Relay the signal if clock_nanosleep is interrupted.
-        */
-       if (clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL))
-               deliver_alarm();
+       pause();
 }
index 1d7558d..a3dd615 100644 (file)
@@ -137,20 +137,13 @@ static inline int is_umdir_used(char *dir)
 {
        char pid[sizeof("nnnnnnnnn")], *end, *file;
        int dead, fd, p, n, err;
-       size_t filelen;
+       size_t filelen = strlen(dir) + sizeof("/pid") + 1;
 
-       err = asprintf(&file, "%s/pid", dir);
-       if (err < 0)
-               return 0;
-
-       filelen = strlen(file);
+       file = malloc(filelen);
+       if (!file)
+               return -ENOMEM;
 
-       n = snprintf(file, filelen, "%s/pid", dir);
-       if (n >= filelen) {
-               printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n");
-               err = -E2BIG;
-               goto out;
-       }
+       snprintf(file, filelen, "%s/pid", dir);
 
        dead = 0;
        fd = open(file, O_RDONLY);
index d9a631c..901ea5e 100644 (file)
@@ -12,6 +12,7 @@
 #undef CONFIG_PARAVIRT_XXL
 #undef CONFIG_PARAVIRT_SPINLOCKS
 #undef CONFIG_KASAN
+#undef CONFIG_KASAN_GENERIC
 
 /* cpu_feature_enabled() cannot be used this early */
 #define USE_EARLY_PGTABLE_L5
index 0d0667a..874aeac 100644 (file)
 438    i386    pidfd_getfd             sys_pidfd_getfd
 439    i386    faccessat2              sys_faccessat2
 440    i386    process_madvise         sys_process_madvise
+441    i386    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
index 3798192..7867212 100644 (file)
 438    common  pidfd_getfd             sys_pidfd_getfd
 439    common  faccessat2              sys_faccessat2
 440    common  process_madvise         sys_process_madvise
+441    common  epoll_pwait2            sys_epoll_pwait2
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index f5ef2d5..84b8878 100644 (file)
 #define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 #define X86_FEATURE_SEV_ES             ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_VM_PAGE_FLUSH      ( 8*32+21) /* "" VM Page Flush MSR is supported */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_TSXLDTRK           (18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR           (18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AVX512_FP16                (18*32+23) /* AVX512 FP16 */
 #define X86_FEATURE_SPEC_CTRL          (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP                (18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D          (18*32+28) /* Flush L1D cache */
index 7e5f33a..3ab7b46 100644 (file)
@@ -614,6 +614,7 @@ struct kvm_vcpu_arch {
 
        struct kvm_pio_request pio;
        void *pio_data;
+       void *guest_ins_data;
 
        u8 event_exit_inst_len;
 
@@ -805,6 +806,9 @@ struct kvm_vcpu_arch {
                 */
                bool enforce;
        } pv_cpuid;
+
+       /* Protected Guests */
+       bool guest_state_protected;
 };
 
 struct kvm_lpage_info {
@@ -1088,7 +1092,7 @@ struct kvm_x86_ops {
        void (*hardware_disable)(void);
        void (*hardware_unsetup)(void);
        bool (*cpu_has_accelerated_tpr)(void);
-       bool (*has_emulated_msr)(u32 index);
+       bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
        void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
        unsigned int vm_size;
@@ -1115,7 +1119,8 @@ struct kvm_x86_ops {
                            struct kvm_segment *var, int seg);
        void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
        void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
-       int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+       bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+       void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
        int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
        void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
        void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
@@ -1231,6 +1236,7 @@ struct kvm_x86_ops {
        void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
                                           struct kvm_memory_slot *slot,
                                           gfn_t offset, unsigned long mask);
+       int (*cpu_dirty_log_size)(void);
 
        /* pmu operations of sub-arch */
        const struct kvm_pmu_ops *pmu_ops;
@@ -1280,6 +1286,7 @@ struct kvm_x86_ops {
 
        void (*migrate_timers)(struct kvm_vcpu *vcpu);
        void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+       int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 };
 
 struct kvm_x86_nested_ops {
@@ -1470,6 +1477,10 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
                    int reason, bool has_error_code, u32 error_code);
 
+void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
+
+void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
+void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
@@ -1696,7 +1707,8 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
 
 int kvm_is_in_guest(void);
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+                                    u32 size);
 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
@@ -1743,4 +1755,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define GET_SMSTATE(type, buf, offset)         \
        (*(type *)((buf) + (offset) - 0x7e00))
 
+int kvm_cpu_dirty_log_size(void);
+
 #endif /* _ASM_X86_KVM_HOST_H */
index 2b5fc9a..546d6ec 100644 (file)
 #define MSR_AMD64_ICIBSEXTDCTL         0xc001103c
 #define MSR_AMD64_IBSOPDATA4           0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX    8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH                0xc001011e
 #define MSR_AMD64_SEV_ES_GHCB          0xc0010130
 #define MSR_AMD64_SEV                  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT      0
index 71d630b..1c56194 100644 (file)
@@ -98,6 +98,16 @@ enum {
        INTERCEPT_MWAIT_COND,
        INTERCEPT_XSETBV,
        INTERCEPT_RDPRU,
+       TRAP_EFER_WRITE,
+       TRAP_CR0_WRITE,
+       TRAP_CR1_WRITE,
+       TRAP_CR2_WRITE,
+       TRAP_CR3_WRITE,
+       TRAP_CR4_WRITE,
+       TRAP_CR5_WRITE,
+       TRAP_CR6_WRITE,
+       TRAP_CR7_WRITE,
+       TRAP_CR8_WRITE,
        /* Byte offset 014h (word 5) */
        INTERCEPT_INVLPGB = 160,
        INTERCEPT_INVLPGB_ILLEGAL,
@@ -130,7 +140,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
        u32 exit_int_info_err;
        u64 nested_ctl;
        u64 avic_vapic_bar;
-       u8 reserved_4[8];
+       u64 ghcb_gpa;
        u32 event_inj;
        u32 event_inj_err;
        u64 nested_cr3;
@@ -144,6 +154,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
        u8 reserved_6[8];       /* Offset 0xe8 */
        u64 avic_logical_id;    /* Offset 0xf0 */
        u64 avic_physical_id;   /* Offset 0xf8 */
+       u8 reserved_7[8];
+       u64 vmsa_pa;            /* Used for an SEV-ES guest */
 };
 
 
@@ -178,7 +190,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK      BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK       BIT_ULL(1)
 
 #define SVM_IOIO_STR_SHIFT 2
 #define SVM_IOIO_REP_SHIFT 3
@@ -197,6 +210,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 
 #define SVM_NESTED_CTL_NP_ENABLE       BIT(0)
 #define SVM_NESTED_CTL_SEV_ENABLE      BIT(1)
+#define SVM_NESTED_CTL_SEV_ES_ENABLE   BIT(2)
 
 struct vmcb_seg {
        u16 selector;
@@ -220,7 +234,8 @@ struct vmcb_save_area {
        u8 cpl;
        u8 reserved_2[4];
        u64 efer;
-       u8 reserved_3[112];
+       u8 reserved_3[104];
+       u64 xss;                /* Valid for SEV-ES only */
        u64 cr4;
        u64 cr3;
        u64 cr0;
@@ -251,9 +266,12 @@ struct vmcb_save_area {
 
        /*
         * The following part of the save area is valid only for
-        * SEV-ES guests when referenced through the GHCB.
+        * SEV-ES guests when referenced through the GHCB or for
+        * saving to the host save area.
         */
-       u8 reserved_7[104];
+       u8 reserved_7[80];
+       u32 pkru;
+       u8 reserved_7a[20];
        u64 reserved_8;         /* rax already available at 0x01f8 */
        u64 rcx;
        u64 rdx;
@@ -294,7 +312,7 @@ struct ghcb {
 
 
 #define EXPECTED_VMCB_SAVE_AREA_SIZE           1032
-#define EXPECTED_VMCB_CONTROL_AREA_SIZE                256
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE                272
 #define EXPECTED_GHCB_SIZE                     PAGE_SIZE
 
 static inline void __unused_size_checks(void)
@@ -379,6 +397,16 @@ struct vmcb {
                                (unsigned long *)&ghcb->save.valid_bitmap);     \
        }                                                                       \
                                                                                \
+       static inline u64 ghcb_get_##field(struct ghcb *ghcb)                   \
+       {                                                                       \
+               return ghcb->save.field;                                        \
+       }                                                                       \
+                                                                               \
+       static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb)        \
+       {                                                                       \
+               return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0;    \
+       }                                                                       \
+                                                                               \
        static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value)       \
        {                                                                       \
                __set_bit(GHCB_BITMAP_IDX(field),                               \
index f8ba528..38ca445 100644 (file)
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA                 0x00000020
 #define VMX_MISC_ACTIVITY_HLT                  0x00000040
+#define VMX_MISC_ACTIVITY_WAIT_SIPI            0x00000100
 #define VMX_MISC_ZERO_LEN_INS                  0x40000000
 #define VMX_MISC_MSR_LIST_MULTIPLIER           512
 
index 5941e18..1a162e5 100644 (file)
@@ -355,7 +355,7 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr);
 void make_lowmem_page_readonly(void *vaddr);
 void make_lowmem_page_readwrite(void *vaddr);
 
-#define xen_remap(cookie, size) ioremap((cookie), (size));
+#define xen_remap(cookie, size) ioremap((cookie), (size))
 #define xen_unmap(cookie) iounmap((cookie))
 
 static inline bool xen_arch_need_swiotlb(struct device *dev,
index 89e5f3d..8e76d37 100644 (file)
@@ -12,6 +12,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
index f1d8307..554f75f 100644 (file)
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_RDPRU         0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP               0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP                        0x090
+#define SVM_EXIT_CR1_WRITE_TRAP                        0x091
+#define SVM_EXIT_CR2_WRITE_TRAP                        0x092
+#define SVM_EXIT_CR3_WRITE_TRAP                        0x093
+#define SVM_EXIT_CR4_WRITE_TRAP                        0x094
+#define SVM_EXIT_CR5_WRITE_TRAP                        0x095
+#define SVM_EXIT_CR6_WRITE_TRAP                        0x096
+#define SVM_EXIT_CR7_WRITE_TRAP                        0x097
+#define SVM_EXIT_CR8_WRITE_TRAP                        0x098
+#define SVM_EXIT_CR9_WRITE_TRAP                        0x099
+#define SVM_EXIT_CR10_WRITE_TRAP               0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP               0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP               0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP               0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP               0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP               0x09f
 #define SVM_EXIT_INVPCID       0x0a2
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI           0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS     0x402
+#define SVM_EXIT_VMGEXIT       0x403
 
 /* SEV-ES software-defined VMGEXIT events */
 #define SVM_VMGEXIT_MMIO_READ                  0x80000001
        { SVM_EXIT_MONITOR,     "monitor" }, \
        { SVM_EXIT_MWAIT,       "mwait" }, \
        { SVM_EXIT_XSETBV,      "xsetbv" }, \
+       { SVM_EXIT_EFER_WRITE_TRAP,     "write_efer_trap" }, \
+       { SVM_EXIT_CR0_WRITE_TRAP,      "write_cr0_trap" }, \
+       { SVM_EXIT_CR4_WRITE_TRAP,      "write_cr4_trap" }, \
+       { SVM_EXIT_CR8_WRITE_TRAP,      "write_cr8_trap" }, \
        { SVM_EXIT_INVPCID,     "invpcid" }, \
        { SVM_EXIT_NPF,         "npf" }, \
        { SVM_EXIT_AVIC_INCOMPLETE_IPI,         "avic_incomplete_ipi" }, \
        { SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+       { SVM_EXIT_VMGEXIT,             "vmgexit" }, \
+       { SVM_VMGEXIT_MMIO_READ,        "vmgexit_mmio_read" }, \
+       { SVM_VMGEXIT_MMIO_WRITE,       "vmgexit_mmio_write" }, \
+       { SVM_VMGEXIT_NMI_COMPLETE,     "vmgexit_nmi_complete" }, \
+       { SVM_VMGEXIT_AP_HLT_LOOP,      "vmgexit_ap_hlt_loop" }, \
+       { SVM_VMGEXIT_AP_JUMP_TABLE,    "vmgexit_ap_jump_table" }, \
        { SVM_EXIT_ERR,         "invalid_guest_state" }
 
 
index b8ff9e8..ada955c 100644 (file)
@@ -32,6 +32,7 @@
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
 #define EXIT_REASON_TRIPLE_FAULT        2
 #define EXIT_REASON_INIT_SIGNAL                        3
+#define EXIT_REASON_SIPI_SIGNAL         4
 
 #define EXIT_REASON_INTERRUPT_WINDOW    7
 #define EXIT_REASON_NMI_WINDOW          8
@@ -94,6 +95,7 @@
        { EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
        { EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
        { EXIT_REASON_INIT_SIGNAL,           "INIT_SIGNAL" }, \
+       { EXIT_REASON_SIPI_SIGNAL,           "SIPI_SIGNAL" }, \
        { EXIT_REASON_INTERRUPT_WINDOW,      "INTERRUPT_WINDOW" }, \
        { EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
        { EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
index c8daa92..5d3a0b8 100644 (file)
@@ -112,7 +112,7 @@ SYM_FUNC_START(do_suspend_lowlevel)
        movq    pt_regs_r14(%rax), %r14
        movq    pt_regs_r15(%rax), %r15
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK
        /*
         * The suspend path may have poisoned some areas deeper in the stack,
         * which we now need to unpoison.
index d502241..42af31b 100644 (file)
@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = {
        { X86_FEATURE_CQM_MBM_TOTAL,            X86_FEATURE_CQM_LLC   },
        { X86_FEATURE_CQM_MBM_LOCAL,            X86_FEATURE_CQM_LLC   },
        { X86_FEATURE_AVX512_BF16,              X86_FEATURE_AVX512VL  },
+       { X86_FEATURE_AVX512_FP16,              X86_FEATURE_AVX512BW  },
        { X86_FEATURE_ENQCMD,                   X86_FEATURE_XSAVES    },
        { X86_FEATURE_PER_THREAD_MBA,           X86_FEATURE_MBA       },
        {}
index 866c9a9..2369249 100644 (file)
@@ -44,6 +44,7 @@ static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_SEV,              CPUID_EAX,  1, 0x8000001f, 0 },
        { X86_FEATURE_SEV_ES,           CPUID_EAX,  3, 0x8000001f, 0 },
        { X86_FEATURE_SME_COHERENT,     CPUID_EAX, 10, 0x8000001f, 0 },
+       { X86_FEATURE_VM_PAGE_FLUSH,    CPUID_EAX,  2, 0x8000001f, 0 },
        { 0, 0, 0, 0, 0 }
 };
 
index 924571f..c6ede3b 100644 (file)
@@ -501,12 +501,12 @@ static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
              ghcb_rbp_is_valid(ghcb)))
                return false;
 
-       regs->bx = ghcb->save.rbx;
-       regs->cx = ghcb->save.rcx;
-       regs->dx = ghcb->save.rdx;
-       regs->si = ghcb->save.rsi;
-       regs->di = ghcb->save.rdi;
-       regs->bp = ghcb->save.rbp;
+       regs->bx = ghcb_get_rbx(ghcb);
+       regs->cx = ghcb_get_rcx(ghcb);
+       regs->dx = ghcb_get_rdx(ghcb);
+       regs->si = ghcb_get_rsi(ghcb);
+       regs->di = ghcb_get_rdi(ghcb);
+       regs->bp = ghcb_get_rbp(ghcb);
 
        return true;
 }
index 34b18f6..aa59374 100644 (file)
@@ -44,7 +44,6 @@ static int __init parse_no_kvmclock_vsyscall(char *arg)
 early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
 
 /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
-#define HV_CLOCK_SIZE  (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS)
 #define HVC_BOOT_ARRAY_SIZE \
        (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
 
index fb55981..7f5aec7 100644 (file)
@@ -303,11 +303,12 @@ DEFINE_IDTENTRY_ERRORCODE(exc_alignment_check)
        local_irq_enable();
 
        if (handle_user_split_lock(regs, error_code))
-               return;
+               goto out;
 
        do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
                error_code, BUS_ADRALN, NULL);
 
+out:
        local_irq_disable();
 }
 
index f92dfd8..7ac5926 100644 (file)
@@ -100,7 +100,8 @@ config KVM_AMD_SEV
        depends on KVM_AMD && X86_64
        depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
        help
-       Provides support for launching Encrypted VMs on AMD processors.
+         Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
+         with Encrypted State (SEV-ES) on AMD processors.
 
 config KVM_MMU_AUDIT
        bool "Audit KVM MMU"
index b804444..4bd14ab 100644 (file)
@@ -10,7 +10,8 @@ endif
 KVM := ../../../virt/kvm
 
 kvm-y                  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-                               $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+                               $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
+                               $(KVM)/dirty_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
 
 kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o \
index 83637a2..13036cf 100644 (file)
@@ -146,6 +146,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
                                           MSR_IA32_MISC_ENABLE_MWAIT);
        }
 }
+EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
 static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
@@ -418,7 +419,7 @@ void kvm_set_cpu_caps(void)
                F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
                F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
                F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
-               F(SERIALIZE) | F(TSXLDTRK)
+               F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16)
        );
 
        /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
index f7a6e8f..dc921d7 100644 (file)
@@ -264,6 +264,20 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
        return x86_stepping(best->eax);
 }
 
+static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
+{
+       return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD));
+}
+
+static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
+{
+       return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
+               guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB));
+}
+
 static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
index 5c7c406..922c69d 100644 (file)
@@ -1951,8 +1951,8 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
        return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
 }
 
-int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
-                               struct kvm_cpuid_entry2 __user *entries)
+int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
+                    struct kvm_cpuid_entry2 __user *entries)
 {
        uint16_t evmcs_ver = 0;
        struct kvm_cpuid_entry2 cpuid_entries[] = {
@@ -2037,7 +2037,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
                         * Direct Synthetic timers only make sense with in-kernel
                         * LAPIC
                         */
-                       if (lapic_in_kernel(vcpu))
+                       if (!vcpu || lapic_in_kernel(vcpu))
                                ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
 
                        break;
index e68c6c2..6d7def2 100644 (file)
@@ -126,7 +126,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
-int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
-                               struct kvm_cpuid_entry2 __user *entries);
+int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
+                    struct kvm_cpuid_entry2 __user *entries);
 
 #endif
index a889563..f15bc16 100644 (file)
@@ -9,6 +9,31 @@
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
         | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
 
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+                                            enum kvm_reg reg)
+{
+       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+                                        enum kvm_reg reg)
+{
+       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+                                              enum kvm_reg reg)
+{
+       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+                                          enum kvm_reg reg)
+{
+       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 #define BUILD_KVM_GPR_ACCESSORS(lname, uname)                                \
 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
 {                                                                            \
@@ -18,6 +43,7 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu,              \
                                                unsigned long val)            \
 {                                                                            \
        vcpu->arch.regs[VCPU_REGS_##uname] = val;                             \
+       kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname);                     \
 }
 BUILD_KVM_GPR_ACCESSORS(rax, RAX)
 BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -37,31 +63,6 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
-static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
-                                            enum kvm_reg reg)
-{
-       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
-                                        enum kvm_reg reg)
-{
-       return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
-static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
-                                              enum kvm_reg reg)
-{
-       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
-                                          enum kvm_reg reg)
-{
-       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-       __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
 {
        if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
index 86c33d5..3136e05 100644 (file)
@@ -2843,14 +2843,35 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
        u8 sipi_vector;
+       int r;
        unsigned long pe;
 
-       if (!lapic_in_kernel(vcpu) || !apic->pending_events)
+       if (!lapic_in_kernel(vcpu))
                return;
 
        /*
+        * Read pending events before calling the check_events
+        * callback.
+        */
+       pe = smp_load_acquire(&apic->pending_events);
+       if (!pe)
+               return;
+
+       if (is_guest_mode(vcpu)) {
+               r = kvm_x86_ops.nested_ops->check_events(vcpu);
+               if (r < 0)
+                       return;
+               /*
+                * If an event has happened and caused a vmexit,
+                * we know INITs are latched and therefore
+                * we will not incorrectly deliver an APIC
+                * event instead of a vmexit.
+                */
+       }
+
+       /*
         * INITs are latched while CPU is in specific states
-        * (SMM, VMX non-root mode, SVM with GIF=0).
+        * (SMM, VMX root mode, SVM with GIF=0).
         * Because a CPU cannot be in these states immediately
         * after it has processed an INIT signal (and thus in
         * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
@@ -2858,26 +2879,28 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
         */
        if (kvm_vcpu_latch_init(vcpu)) {
                WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
-               if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
+               if (test_bit(KVM_APIC_SIPI, &pe))
                        clear_bit(KVM_APIC_SIPI, &apic->pending_events);
                return;
        }
 
-       pe = xchg(&apic->pending_events, 0);
        if (test_bit(KVM_APIC_INIT, &pe)) {
+               clear_bit(KVM_APIC_INIT, &apic->pending_events);
                kvm_vcpu_reset(vcpu, true);
                if (kvm_vcpu_is_bsp(apic->vcpu))
                        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
                else
                        vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
        }
-       if (test_bit(KVM_APIC_SIPI, &pe) &&
-           vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
-               /* evaluate pending_events before reading the vector */
-               smp_rmb();
-               sipi_vector = apic->sipi_vector;
-               kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
-               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+       if (test_bit(KVM_APIC_SIPI, &pe)) {
+               clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+               if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
+                       /* evaluate pending_events before reading the vector */
+                       smp_rmb();
+                       sipi_vector = apic->sipi_vector;
+                       kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+                       vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+               }
        }
 }
 
index 7a6ae9e..c478904 100644 (file)
@@ -820,7 +820,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
        slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
        if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
                return NULL;
-       if (no_dirty_log && slot->dirty_bitmap)
+       if (no_dirty_log && kvm_slot_dirty_track_enabled(slot))
                return NULL;
 
        return slot;
@@ -1289,6 +1289,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
+int kvm_cpu_dirty_log_size(void)
+{
+       if (kvm_x86_ops.cpu_dirty_log_size)
+               return kvm_x86_ops.cpu_dirty_log_size();
+
+       return 0;
+}
+
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    struct kvm_memory_slot *slot, u64 gfn)
 {
index 213699b..e798489 100644 (file)
@@ -381,6 +381,35 @@ TRACE_EVENT(
        )
 );
 
+TRACE_EVENT(
+       kvm_tdp_mmu_spte_changed,
+       TP_PROTO(int as_id, gfn_t gfn, int level, u64 old_spte, u64 new_spte),
+       TP_ARGS(as_id, gfn, level, old_spte, new_spte),
+
+       TP_STRUCT__entry(
+               __field(u64, gfn)
+               __field(u64, old_spte)
+               __field(u64, new_spte)
+               /* Level cannot be larger than 5 on x86, so it fits in a u8. */
+               __field(u8, level)
+               /* as_id can only be 0 or 1 x86, so it fits in a u8. */
+               __field(u8, as_id)
+       ),
+
+       TP_fast_assign(
+               __entry->gfn = gfn;
+               __entry->old_spte = old_spte;
+               __entry->new_spte = new_spte;
+               __entry->level = level;
+               __entry->as_id = as_id;
+       ),
+
+       TP_printk("as id %d gfn %llx level %d old_spte %llx new_spte %llx",
+                 __entry->as_id, __entry->gfn, __entry->level,
+                 __entry->old_spte, __entry->new_spte
+       )
+);
+
 #endif /* _TRACE_KVMMMU_H */
 
 #undef TRACE_INCLUDE_PATH
index 84c8f06..4bd2f1d 100644 (file)
@@ -7,6 +7,8 @@
 #include "tdp_mmu.h"
 #include "spte.h"
 
+#include <trace/events/kvm.h>
+
 #ifdef CONFIG_X86_64
 static bool __read_mostly tdp_mmu_enabled = false;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
@@ -108,6 +110,8 @@ static struct kvm_mmu_page *alloc_tdp_mmu_page(struct kvm_vcpu *vcpu, gfn_t gfn,
        sp->gfn = gfn;
        sp->tdp_mmu_page = true;
 
+       trace_kvm_mmu_get_page(sp, true);
+
        return sp;
 }
 
@@ -185,7 +189,7 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
        if ((!is_writable_pte(old_spte) || pfn_changed) &&
            is_writable_pte(new_spte)) {
                slot = __gfn_to_memslot(__kvm_memslots(kvm, as_id), gfn);
-               mark_page_dirty_in_slot(slot, gfn);
+               mark_page_dirty_in_slot(kvm, slot, gfn);
        }
 }
 
@@ -244,6 +248,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
        if (old_spte == new_spte)
                return;
 
+       trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
+
        /*
         * The only times a SPTE should be changed from a non-present to
         * non-present state is when an MMIO entry is installed/modified/
@@ -278,6 +284,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
                pt = spte_to_child_pt(old_spte, level);
                sp = sptep_to_sp(pt);
 
+               trace_kvm_mmu_prepare_zap_page(sp);
+
                list_del(&sp->link);
 
                if (sp->lpage_disallowed)
@@ -480,11 +488,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
        if (unlikely(is_noslot_pfn(pfn))) {
                new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
                trace_mark_mmio_spte(iter->sptep, iter->gfn, new_spte);
-       } else
+       } else {
                make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn,
                                         pfn, iter->old_spte, prefault, true,
                                         map_writable, !shadow_accessed_mask,
                                         &new_spte);
+               trace_kvm_mmu_set_spte(iter->level, iter->gfn, iter->sptep);
+       }
 
        if (new_spte == iter->old_spte)
                ret = RET_PF_SPURIOUS;
@@ -698,6 +708,8 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot,
 
                tdp_mmu_set_spte_no_acc_track(kvm, &iter, new_spte);
                young = 1;
+
+               trace_kvm_age_page(iter.gfn, iter.level, slot, young);
        }
 
        return young;
index 7f0059a..f472fdb 100644 (file)
@@ -84,12 +84,8 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        } else
                /* MTRR mask */
                mask |= 0x7ff;
-       if (data & mask) {
-               kvm_inject_gp(vcpu, 0);
-               return false;
-       }
 
-       return true;
+       return (data & mask) == 0;
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
index 8c55099..0ef84d5 100644 (file)
@@ -233,7 +233,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
  */
 static int avic_update_access_page(struct kvm *kvm, bool activate)
 {
-       int ret = 0;
+       void __user *ret;
+       int r = 0;
 
        mutex_lock(&kvm->slots_lock);
        /*
@@ -249,13 +250,15 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
                                      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
                                      APIC_DEFAULT_PHYS_BASE,
                                      activate ? PAGE_SIZE : 0);
-       if (ret)
+       if (IS_ERR(ret)) {
+               r = PTR_ERR(ret);
                goto out;
+       }
 
        kvm->arch.apic_access_page_done = activate;
 out:
        mutex_unlock(&kvm->slots_lock);
-       return ret;
+       return r;
 }
 
 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
index 9e4c226..b0b6674 100644 (file)
@@ -254,7 +254,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
                    (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
                        return false;
        }
-       if (kvm_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
+       if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
                return false;
 
        return nested_vmcb_check_controls(&vmcb12->control);
@@ -381,7 +381,7 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
        svm->vmcb->save.ds = vmcb12->save.ds;
        svm->vmcb->save.gdtr = vmcb12->save.gdtr;
        svm->vmcb->save.idtr = vmcb12->save.idtr;
-       kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags);
+       kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
        svm_set_efer(&svm->vcpu, vmcb12->save.efer);
        svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
        svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
@@ -394,8 +394,8 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
        svm->vmcb->save.rax = vmcb12->save.rax;
        svm->vmcb->save.rsp = vmcb12->save.rsp;
        svm->vmcb->save.rip = vmcb12->save.rip;
-       svm->vmcb->save.dr7 = vmcb12->save.dr7;
-       svm->vcpu.arch.dr6  = vmcb12->save.dr6;
+       svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
+       svm->vcpu.arch.dr6  = vmcb12->save.dr6 | DR6_FIXED_1 | DR6_RTM;
        svm->vmcb->save.cpl = vmcb12->save.cpl;
 }
 
@@ -660,13 +660,14 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        svm->vmcb->save.gdtr = hsave->save.gdtr;
        svm->vmcb->save.idtr = hsave->save.idtr;
        kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
+       kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED);
        svm_set_efer(&svm->vcpu, hsave->save.efer);
        svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
        svm_set_cr4(&svm->vcpu, hsave->save.cr4);
        kvm_rax_write(&svm->vcpu, hsave->save.rax);
        kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
        kvm_rip_write(&svm->vcpu, hsave->save.rip);
-       svm->vmcb->save.dr7 = 0;
+       svm->vmcb->save.dr7 = DR7_FIXED_1;
        svm->vmcb->save.cpl = 0;
        svm->vmcb->control.exit_int_info = 0;
 
index 566f4d1..9858d5a 100644 (file)
 #include <linux/psp-sev.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/processor.h>
+#include <linux/trace_events.h>
+#include <asm/fpu/internal.h>
+
+#include <asm/trapnr.h>
 
 #include "x86.h"
 #include "svm.h"
+#include "cpuid.h"
+#include "trace.h"
+
+#define __ex(x) __kvm_handle_fault_on_reboot(x)
 
+static u8 sev_enc_bit;
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
@@ -25,7 +35,6 @@ unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long *sev_asid_bitmap;
 static unsigned long *sev_reclaim_asid_bitmap;
-#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
 struct enc_region {
        struct list_head list;
@@ -57,19 +66,19 @@ static int sev_flush_asids(void)
 }
 
 /* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(void)
+static bool __sev_recycle_asids(int min_asid, int max_asid)
 {
        int pos;
 
        /* Check if there are any ASIDs to reclaim before performing a flush */
-       pos = find_next_bit(sev_reclaim_asid_bitmap,
-                           max_sev_asid, min_sev_asid - 1);
-       if (pos >= max_sev_asid)
+       pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
+       if (pos >= max_asid)
                return false;
 
        if (sev_flush_asids())
                return false;
 
+       /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
        bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
                   max_sev_asid);
        bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
@@ -77,20 +86,23 @@ static bool __sev_recycle_asids(void)
        return true;
 }
 
-static int sev_asid_new(void)
+static int sev_asid_new(struct kvm_sev_info *sev)
 {
+       int pos, min_asid, max_asid;
        bool retry = true;
-       int pos;
 
        mutex_lock(&sev_bitmap_lock);
 
        /*
-        * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+        * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+        * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
         */
+       min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+       max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
 again:
-       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
-       if (pos >= max_sev_asid) {
-               if (retry && __sev_recycle_asids()) {
+       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
+       if (pos >= max_asid) {
+               if (retry && __sev_recycle_asids(min_asid, max_asid)) {
                        retry = false;
                        goto again;
                }
@@ -172,7 +184,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (unlikely(sev->active))
                return ret;
 
-       asid = sev_asid_new();
+       asid = sev_asid_new(sev);
        if (asid < 0)
                return ret;
 
@@ -191,6 +203,16 @@ e_free:
        return ret;
 }
 
+static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+       if (!sev_es)
+               return -ENOTTY;
+
+       to_kvm_svm(kvm)->sev_info.es_active = true;
+
+       return sev_guest_init(kvm, argp);
+}
+
 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 {
        struct sev_data_activate *data;
@@ -490,6 +512,96 @@ e_free:
        return ret;
 }
 
+static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+{
+       struct vmcb_save_area *save = &svm->vmcb->save;
+
+       /* Check some debug related fields before encrypting the VMSA */
+       if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+               return -EINVAL;
+
+       /* Sync registgers */
+       save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
+       save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
+       save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+       save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
+       save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
+       save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
+       save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
+       save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
+#ifdef CONFIG_X86_64
+       save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
+       save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
+       save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
+       save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
+       save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
+       save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
+       save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
+       save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
+#endif
+       save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
+
+       /* Sync some non-GPR registers before encrypting */
+       save->xcr0 = svm->vcpu.arch.xcr0;
+       save->pkru = svm->vcpu.arch.pkru;
+       save->xss  = svm->vcpu.arch.ia32_xss;
+
+       /*
+        * SEV-ES will use a VMSA that is pointed to by the VMCB, not
+        * the traditional VMSA that is part of the VMCB. Copy the
+        * traditional VMSA as it has been built so far (in prep
+        * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+        */
+       memcpy(svm->vmsa, save, sizeof(*save));
+
+       return 0;
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+       struct sev_data_launch_update_vmsa *vmsa;
+       int i, ret;
+
+       if (!sev_es_guest(kvm))
+               return -ENOTTY;
+
+       vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
+       if (!vmsa)
+               return -ENOMEM;
+
+       for (i = 0; i < kvm->created_vcpus; i++) {
+               struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
+
+               /* Perform some pre-encryption checks against the VMSA */
+               ret = sev_es_sync_vmsa(svm);
+               if (ret)
+                       goto e_free;
+
+               /*
+                * The LAUNCH_UPDATE_VMSA command will perform in-place
+                * encryption of the VMSA memory content (i.e it will write
+                * the same memory region with the guest's key), so invalidate
+                * it first.
+                */
+               clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+               vmsa->handle = sev->handle;
+               vmsa->address = __sme_pa(svm->vmsa);
+               vmsa->len = PAGE_SIZE;
+               ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
+                                   &argp->error);
+               if (ret)
+                       goto e_free;
+
+               svm->vcpu.arch.guest_state_protected = true;
+       }
+
+e_free:
+       kfree(vmsa);
+       return ret;
+}
+
 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
        void __user *measure = (void __user *)(uintptr_t)argp->data;
@@ -932,7 +1044,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
        struct kvm_sev_cmd sev_cmd;
        int r;
 
-       if (!svm_sev_enabled())
+       if (!svm_sev_enabled() || !sev)
                return -ENOTTY;
 
        if (!argp)
@@ -947,12 +1059,18 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
        case KVM_SEV_INIT:
                r = sev_guest_init(kvm, &sev_cmd);
                break;
+       case KVM_SEV_ES_INIT:
+               r = sev_es_guest_init(kvm, &sev_cmd);
+               break;
        case KVM_SEV_LAUNCH_START:
                r = sev_launch_start(kvm, &sev_cmd);
                break;
        case KVM_SEV_LAUNCH_UPDATE_DATA:
                r = sev_launch_update_data(kvm, &sev_cmd);
                break;
+       case KVM_SEV_LAUNCH_UPDATE_VMSA:
+               r = sev_launch_update_vmsa(kvm, &sev_cmd);
+               break;
        case KVM_SEV_LAUNCH_MEASURE:
                r = sev_launch_measure(kvm, &sev_cmd);
                break;
@@ -1125,49 +1243,61 @@ void sev_vm_destroy(struct kvm *kvm)
        sev_asid_free(sev->asid);
 }
 
-int __init sev_hardware_setup(void)
+void __init sev_hardware_setup(void)
 {
-       struct sev_user_data_status *status;
-       int rc;
+       unsigned int eax, ebx, ecx, edx;
+       bool sev_es_supported = false;
+       bool sev_supported = false;
+
+       /* Does the CPU support SEV? */
+       if (!boot_cpu_has(X86_FEATURE_SEV))
+               goto out;
+
+       /* Retrieve SEV CPUID information */
+       cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
+
+       /* Set encryption bit location for SEV-ES guests */
+       sev_enc_bit = ebx & 0x3f;
 
        /* Maximum number of encrypted guests supported simultaneously */
-       max_sev_asid = cpuid_ecx(0x8000001F);
+       max_sev_asid = ecx;
 
        if (!svm_sev_enabled())
-               return 1;
+               goto out;
 
        /* Minimum ASID value that should be used for SEV guest */
-       min_sev_asid = cpuid_edx(0x8000001F);
+       min_sev_asid = edx;
 
        /* Initialize SEV ASID bitmaps */
        sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
        if (!sev_asid_bitmap)
-               return 1;
+               goto out;
 
        sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
        if (!sev_reclaim_asid_bitmap)
-               return 1;
+               goto out;
 
-       status = kmalloc(sizeof(*status), GFP_KERNEL);
-       if (!status)
-               return 1;
+       pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+       sev_supported = true;
 
-       /*
-        * Check SEV platform status.
-        *
-        * PLATFORM_STATUS can be called in any state, if we failed to query
-        * the PLATFORM status then either PSP firmware does not support SEV
-        * feature or SEV firmware is dead.
-        */
-       rc = sev_platform_status(status, NULL);
-       if (rc)
-               goto err;
+       /* SEV-ES support requested? */
+       if (!sev_es)
+               goto out;
 
-       pr_info("SEV supported\n");
+       /* Does the CPU support SEV-ES? */
+       if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+               goto out;
 
-err:
-       kfree(status);
-       return rc;
+       /* Has the system been allocated ASIDs for SEV-ES? */
+       if (min_sev_asid == 1)
+               goto out;
+
+       pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+       sev_es_supported = true;
+
+out:
+       sev = sev_supported;
+       sev_es = sev_es_supported;
 }
 
 void sev_hardware_teardown(void)
@@ -1181,13 +1311,329 @@ void sev_hardware_teardown(void)
        sev_flush_asids();
 }
 
+/*
+ * Pages used by hardware to hold guest encrypted state must be flushed before
+ * returning them to the system.
+ */
+static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
+                                  unsigned long len)
+{
+       /*
+        * If hardware enforced cache coherency for encrypted mappings of the
+        * same physical page is supported, nothing to do.
+        */
+       if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
+               return;
+
+       /*
+        * If the VM Page Flush MSR is supported, use it to flush the page
+        * (using the page virtual address and the guest ASID).
+        */
+       if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
+               struct kvm_sev_info *sev;
+               unsigned long va_start;
+               u64 start, stop;
+
+               /* Align start and stop to page boundaries. */
+               va_start = (unsigned long)va;
+               start = (u64)va_start & PAGE_MASK;
+               stop = PAGE_ALIGN((u64)va_start + len);
+
+               if (start < stop) {
+                       sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+                       while (start < stop) {
+                               wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
+                                      start | sev->asid);
+
+                               start += PAGE_SIZE;
+                       }
+
+                       return;
+               }
+
+               WARN(1, "Address overflow, using WBINVD\n");
+       }
+
+       /*
+        * Hardware should always have one of the above features,
+        * but if not, use WBINVD and issue a warning.
+        */
+       WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
+       wbinvd_on_all_cpus();
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm;
+
+       if (!sev_es_guest(vcpu->kvm))
+               return;
+
+       svm = to_svm(vcpu);
+
+       if (vcpu->arch.guest_state_protected)
+               sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
+       __free_page(virt_to_page(svm->vmsa));
+
+       if (svm->ghcb_sa_free)
+               kfree(svm->ghcb_sa);
+}
+
+static void dump_ghcb(struct vcpu_svm *svm)
+{
+       struct ghcb *ghcb = svm->ghcb;
+       unsigned int nbits;
+
+       /* Re-use the dump_invalid_vmcb module parameter */
+       if (!dump_invalid_vmcb) {
+               pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
+               return;
+       }
+
+       nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+
+       pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
+              ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
+              ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
+              ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
+       pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
+              ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
+       pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+}
+
+static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct ghcb *ghcb = svm->ghcb;
+
+       /*
+        * The GHCB protocol so far allows for the following data
+        * to be returned:
+        *   GPRs RAX, RBX, RCX, RDX
+        *
+        * Copy their values to the GHCB if they are dirty.
+        */
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
+               ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
+               ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
+               ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
+       if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
+               ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+}
+
+static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       struct ghcb *ghcb = svm->ghcb;
+       u64 exit_code;
+
+       /*
+        * The GHCB protocol so far allows for the following data
+        * to be supplied:
+        *   GPRs RAX, RBX, RCX, RDX
+        *   XCR0
+        *   CPL
+        *
+        * VMMCALL allows the guest to provide extra registers. KVM also
+        * expects RSI for hypercalls, so include that, too.
+        *
+        * Copy their values to the appropriate location if supplied.
+        */
+       memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+
+       vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+
+       svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+
+       if (ghcb_xcr0_is_valid(ghcb)) {
+               vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
+               kvm_update_cpuid_runtime(vcpu);
+       }
+
+       /* Copy the GHCB exit information into the VMCB fields */
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+       control->exit_code = lower_32_bits(exit_code);
+       control->exit_code_hi = upper_32_bits(exit_code);
+       control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
+       control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+
+       /* Clear the valid entries fields */
+       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu;
+       struct ghcb *ghcb;
+       u64 exit_code = 0;
+
+       ghcb = svm->ghcb;
+
+       /* Only GHCB Usage code 0 is supported */
+       if (ghcb->ghcb_usage)
+               goto vmgexit_err;
+
+       /*
+        * Retrieve the exit code now even though is may not be marked valid
+        * as it could help with debugging.
+        */
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+
+       if (!ghcb_sw_exit_code_is_valid(ghcb) ||
+           !ghcb_sw_exit_info_1_is_valid(ghcb) ||
+           !ghcb_sw_exit_info_2_is_valid(ghcb))
+               goto vmgexit_err;
+
+       switch (ghcb_get_sw_exit_code(ghcb)) {
+       case SVM_EXIT_READ_DR7:
+               break;
+       case SVM_EXIT_WRITE_DR7:
+               if (!ghcb_rax_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_RDTSC:
+               break;
+       case SVM_EXIT_RDPMC:
+               if (!ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_CPUID:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               if (ghcb_get_rax(ghcb) == 0xd)
+                       if (!ghcb_xcr0_is_valid(ghcb))
+                               goto vmgexit_err;
+               break;
+       case SVM_EXIT_INVD:
+               break;
+       case SVM_EXIT_IOIO:
+               if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
+                       if (!ghcb_sw_scratch_is_valid(ghcb))
+                               goto vmgexit_err;
+               } else {
+                       if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
+                               if (!ghcb_rax_is_valid(ghcb))
+                                       goto vmgexit_err;
+               }
+               break;
+       case SVM_EXIT_MSR:
+               if (!ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               if (ghcb_get_sw_exit_info_1(ghcb)) {
+                       if (!ghcb_rax_is_valid(ghcb) ||
+                           !ghcb_rdx_is_valid(ghcb))
+                               goto vmgexit_err;
+               }
+               break;
+       case SVM_EXIT_VMMCALL:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_cpl_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_RDTSCP:
+               break;
+       case SVM_EXIT_WBINVD:
+               break;
+       case SVM_EXIT_MONITOR:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb) ||
+                   !ghcb_rdx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_EXIT_MWAIT:
+               if (!ghcb_rax_is_valid(ghcb) ||
+                   !ghcb_rcx_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_VMGEXIT_MMIO_READ:
+       case SVM_VMGEXIT_MMIO_WRITE:
+               if (!ghcb_sw_scratch_is_valid(ghcb))
+                       goto vmgexit_err;
+               break;
+       case SVM_VMGEXIT_NMI_COMPLETE:
+       case SVM_VMGEXIT_AP_JUMP_TABLE:
+       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+               break;
+       default:
+               goto vmgexit_err;
+       }
+
+       return 0;
+
+vmgexit_err:
+       vcpu = &svm->vcpu;
+
+       if (ghcb->ghcb_usage) {
+               vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
+                           ghcb->ghcb_usage);
+       } else {
+               vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
+                           exit_code);
+               dump_ghcb(svm);
+       }
+
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+static void pre_sev_es_run(struct vcpu_svm *svm)
+{
+       if (!svm->ghcb)
+               return;
+
+       if (svm->ghcb_sa_free) {
+               /*
+                * The scratch area lives outside the GHCB, so there is a
+                * buffer that, depending on the operation performed, may
+                * need to be synced, then freed.
+                */
+               if (svm->ghcb_sa_sync) {
+                       kvm_write_guest(svm->vcpu.kvm,
+                                       ghcb_get_sw_scratch(svm->ghcb),
+                                       svm->ghcb_sa, svm->ghcb_sa_len);
+                       svm->ghcb_sa_sync = false;
+               }
+
+               kfree(svm->ghcb_sa);
+               svm->ghcb_sa = NULL;
+               svm->ghcb_sa_free = false;
+       }
+
+       trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+
+       sev_es_sync_to_ghcb(svm);
+
+       kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
+       svm->ghcb = NULL;
+}
+
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
 {
        struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
        int asid = sev_get_asid(svm->vcpu.kvm);
 
+       /* Perform any SEV-ES pre-run actions */
+       pre_sev_es_run(svm);
+
        /* Assign the asid allocated with this SEV guest */
-       svm->vmcb->control.asid = asid;
+       svm->asid = asid;
 
        /*
         * Flush guest TLB:
@@ -1203,3 +1649,394 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
        svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
        vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
+
+#define GHCB_SCRATCH_AREA_LIMIT                (16ULL * PAGE_SIZE)
+static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct ghcb *ghcb = svm->ghcb;
+       u64 ghcb_scratch_beg, ghcb_scratch_end;
+       u64 scratch_gpa_beg, scratch_gpa_end;
+       void *scratch_va;
+
+       scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+       if (!scratch_gpa_beg) {
+               pr_err("vmgexit: scratch gpa not provided\n");
+               return false;
+       }
+
+       scratch_gpa_end = scratch_gpa_beg + len;
+       if (scratch_gpa_end < scratch_gpa_beg) {
+               pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
+                      len, scratch_gpa_beg);
+               return false;
+       }
+
+       if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+               /* Scratch area begins within GHCB */
+               ghcb_scratch_beg = control->ghcb_gpa +
+                                  offsetof(struct ghcb, shared_buffer);
+               ghcb_scratch_end = control->ghcb_gpa +
+                                  offsetof(struct ghcb, reserved_1);
+
+               /*
+                * If the scratch area begins within the GHCB, it must be
+                * completely contained in the GHCB shared buffer area.
+                */
+               if (scratch_gpa_beg < ghcb_scratch_beg ||
+                   scratch_gpa_end > ghcb_scratch_end) {
+                       pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
+                              scratch_gpa_beg, scratch_gpa_end);
+                       return false;
+               }
+
+               scratch_va = (void *)svm->ghcb;
+               scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+       } else {
+               /*
+                * The guest memory must be read into a kernel buffer, so
+                * limit the size
+                */
+               if (len > GHCB_SCRATCH_AREA_LIMIT) {
+                       pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
+                              len, GHCB_SCRATCH_AREA_LIMIT);
+                       return false;
+               }
+               scratch_va = kzalloc(len, GFP_KERNEL);
+               if (!scratch_va)
+                       return false;
+
+               if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
+                       /* Unable to copy scratch area from guest */
+                       pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
+
+                       kfree(scratch_va);
+                       return false;
+               }
+
+               /*
+                * The scratch area is outside the GHCB. The operation will
+                * dictate whether the buffer needs to be synced before running
+                * the vCPU next time (i.e. a read was requested so the data
+                * must be written back to the guest memory).
+                */
+               svm->ghcb_sa_sync = sync;
+               svm->ghcb_sa_free = true;
+       }
+
+       svm->ghcb_sa = scratch_va;
+       svm->ghcb_sa_len = len;
+
+       return true;
+}
+
+static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+                             unsigned int pos)
+{
+       svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
+       svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
+}
+
+static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
+{
+       return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
+}
+
+static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
+{
+       svm->vmcb->control.ghcb_gpa = value;
+}
+
+static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       u64 ghcb_info;
+       int ret = 1;
+
+       ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
+
+       trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
+                                            control->ghcb_gpa);
+
+       switch (ghcb_info) {
+       case GHCB_MSR_SEV_INFO_REQ:
+               set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+                                                   GHCB_VERSION_MIN,
+                                                   sev_enc_bit));
+               break;
+       case GHCB_MSR_CPUID_REQ: {
+               u64 cpuid_fn, cpuid_reg, cpuid_value;
+
+               cpuid_fn = get_ghcb_msr_bits(svm,
+                                            GHCB_MSR_CPUID_FUNC_MASK,
+                                            GHCB_MSR_CPUID_FUNC_POS);
+
+               /* Initialize the registers needed by the CPUID intercept */
+               vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
+               vcpu->arch.regs[VCPU_REGS_RCX] = 0;
+
+               ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+               if (!ret) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               cpuid_reg = get_ghcb_msr_bits(svm,
+                                             GHCB_MSR_CPUID_REG_MASK,
+                                             GHCB_MSR_CPUID_REG_POS);
+               if (cpuid_reg == 0)
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
+               else if (cpuid_reg == 1)
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
+               else if (cpuid_reg == 2)
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
+               else
+                       cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
+
+               set_ghcb_msr_bits(svm, cpuid_value,
+                                 GHCB_MSR_CPUID_VALUE_MASK,
+                                 GHCB_MSR_CPUID_VALUE_POS);
+
+               set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
+                                 GHCB_MSR_INFO_MASK,
+                                 GHCB_MSR_INFO_POS);
+               break;
+       }
+       case GHCB_MSR_TERM_REQ: {
+               u64 reason_set, reason_code;
+
+               reason_set = get_ghcb_msr_bits(svm,
+                                              GHCB_MSR_TERM_REASON_SET_MASK,
+                                              GHCB_MSR_TERM_REASON_SET_POS);
+               reason_code = get_ghcb_msr_bits(svm,
+                                               GHCB_MSR_TERM_REASON_MASK,
+                                               GHCB_MSR_TERM_REASON_POS);
+               pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
+                       reason_set, reason_code);
+               fallthrough;
+       }
+       default:
+               ret = -EINVAL;
+       }
+
+       trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
+                                           control->ghcb_gpa, ret);
+
+       return ret;
+}
+
+int sev_handle_vmgexit(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       u64 ghcb_gpa, exit_code;
+       struct ghcb *ghcb;
+       int ret;
+
+       /* Validate the GHCB */
+       ghcb_gpa = control->ghcb_gpa;
+       if (ghcb_gpa & GHCB_MSR_INFO_MASK)
+               return sev_handle_vmgexit_msr_protocol(svm);
+
+       if (!ghcb_gpa) {
+               vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
+               return -EINVAL;
+       }
+
+       if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+               /* Unable to map GHCB from guest */
+               vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
+                           ghcb_gpa);
+               return -EINVAL;
+       }
+
+       svm->ghcb = svm->ghcb_map.hva;
+       ghcb = svm->ghcb_map.hva;
+
+       trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+
+       exit_code = ghcb_get_sw_exit_code(ghcb);
+
+       ret = sev_es_validate_vmgexit(svm);
+       if (ret)
+               return ret;
+
+       sev_es_sync_from_ghcb(svm);
+       ghcb_set_sw_exit_info_1(ghcb, 0);
+       ghcb_set_sw_exit_info_2(ghcb, 0);
+
+       ret = -EINVAL;
+       switch (exit_code) {
+       case SVM_VMGEXIT_MMIO_READ:
+               if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
+                       break;
+
+               ret = kvm_sev_es_mmio_read(&svm->vcpu,
+                                          control->exit_info_1,
+                                          control->exit_info_2,
+                                          svm->ghcb_sa);
+               break;
+       case SVM_VMGEXIT_MMIO_WRITE:
+               if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
+                       break;
+
+               ret = kvm_sev_es_mmio_write(&svm->vcpu,
+                                           control->exit_info_1,
+                                           control->exit_info_2,
+                                           svm->ghcb_sa);
+               break;
+       case SVM_VMGEXIT_NMI_COMPLETE:
+               ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
+               break;
+       case SVM_VMGEXIT_AP_JUMP_TABLE: {
+               struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+               switch (control->exit_info_1) {
+               case 0:
+                       /* Set AP jump table address */
+                       sev->ap_jump_table = control->exit_info_2;
+                       break;
+               case 1:
+                       /* Get AP jump table address */
+                       ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+                       break;
+               default:
+                       pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
+                              control->exit_info_1);
+                       ghcb_set_sw_exit_info_1(ghcb, 1);
+                       ghcb_set_sw_exit_info_2(ghcb,
+                                               X86_TRAP_UD |
+                                               SVM_EVTINJ_TYPE_EXEPT |
+                                               SVM_EVTINJ_VALID);
+               }
+
+               ret = 1;
+               break;
+       }
+       case SVM_VMGEXIT_UNSUPPORTED_EVENT:
+               vcpu_unimpl(&svm->vcpu,
+                           "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
+                           control->exit_info_1, control->exit_info_2);
+               break;
+       default:
+               ret = svm_invoke_exit_handler(svm, exit_code);
+       }
+
+       return ret;
+}
+
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+{
+       if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+               return -EINVAL;
+
+       return kvm_sev_es_string_io(&svm->vcpu, size, port,
+                                   svm->ghcb_sa, svm->ghcb_sa_len, in);
+}
+
+void sev_es_init_vmcb(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+
+       svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
+       svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+
+       /*
+        * An SEV-ES guest requires a VMSA area that is a separate from the
+        * VMCB page. Do not include the encryption mask on the VMSA physical
+        * address since hardware will access it using the guest key.
+        */
+       svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+
+       /* Can't intercept CR register access, HV can't modify CR registers */
+       svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+       svm_clr_intercept(svm, INTERCEPT_CR4_READ);
+       svm_clr_intercept(svm, INTERCEPT_CR8_READ);
+       svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
+       svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
+       svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
+
+       svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+
+       /* Track EFER/CR register changes */
+       svm_set_intercept(svm, TRAP_EFER_WRITE);
+       svm_set_intercept(svm, TRAP_CR0_WRITE);
+       svm_set_intercept(svm, TRAP_CR4_WRITE);
+       svm_set_intercept(svm, TRAP_CR8_WRITE);
+
+       /* No support for enable_vmware_backdoor */
+       clr_exception_intercept(svm, GP_VECTOR);
+
+       /* Can't intercept XSETBV, HV can't modify XCR0 directly */
+       svm_clr_intercept(svm, INTERCEPT_XSETBV);
+
+       /* Clear intercepts on selected MSRs */
+       set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+       set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+}
+
+void sev_es_create_vcpu(struct vcpu_svm *svm)
+{
+       /*
+        * Set the GHCB MSR value as per the GHCB specification when creating
+        * a vCPU for an SEV-ES guest.
+        */
+       set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+                                           GHCB_VERSION_MIN,
+                                           sev_enc_bit));
+}
+
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
+{
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       struct vmcb_save_area *hostsa;
+       unsigned int i;
+
+       /*
+        * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
+        * of which one step is to perform a VMLOAD. Since hardware does not
+        * perform a VMSAVE on VMRUN, the host savearea must be updated.
+        */
+       asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
+
+       /*
+        * Certain MSRs are restored on VMEXIT, only save ones that aren't
+        * restored.
+        */
+       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+               if (host_save_user_msrs[i].sev_es_restored)
+                       continue;
+
+               rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+       }
+
+       /* XCR0 is restored on VMEXIT, save the current host value */
+       hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+       hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
+       /* PKRU is restored on VMEXIT, save the curent host value */
+       hostsa->pkru = read_pkru();
+
+       /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
+       hostsa->xss = host_xss;
+}
+
+void sev_es_vcpu_put(struct vcpu_svm *svm)
+{
+       unsigned int i;
+
+       /*
+        * Certain MSRs are restored on VMEXIT and were saved with vmsave in
+        * sev_es_vcpu_load() above. Only restore ones that weren't.
+        */
+       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+               if (host_save_user_msrs[i].sev_es_restored)
+                       continue;
+
+               wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+       }
+}
index da7eb4a..cce0143 100644 (file)
@@ -33,9 +33,9 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
-#include <asm/mce.h>
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
+#include <asm/traps.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -90,7 +90,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
 
 static const struct svm_direct_access_msrs {
        u32 index;   /* Index of the MSR */
-       bool always; /* True if intercept is always on */
+       bool always; /* True if intercept is initially cleared */
 } direct_access_msrs[MAX_DIRECT_ACCESS_MSRS] = {
        { .index = MSR_STAR,                            .always = true  },
        { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
@@ -108,6 +108,9 @@ static const struct svm_direct_access_msrs {
        { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
        { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
        { .index = MSR_IA32_LASTINTTOIP,                .always = false },
+       { .index = MSR_EFER,                            .always = false },
+       { .index = MSR_IA32_CR_PAT,                     .always = false },
+       { .index = MSR_AMD64_SEV_ES_GHCB,               .always = true  },
        { .index = MSR_INVALID,                         .always = false },
 };
 
@@ -187,10 +190,14 @@ static int vgif = true;
 module_param(vgif, int, 0444);
 
 /* enable/disable SEV support */
-static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
-static bool __read_mostly dump_invalid_vmcb = 0;
+/* enable/disable SEV-ES support */
+int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev_es, int, 0444);
+
+bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
 static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -336,6 +343,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES does not expose the next RIP. The RIP update is controlled by
+        * the type of exit and the #VC handler in the guest.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               goto done;
+
        if (nrips && svm->vmcb->control.next_rip != 0) {
                WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
                svm->next_rip = svm->vmcb->control.next_rip;
@@ -347,6 +361,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
        } else {
                kvm_rip_write(vcpu, svm->next_rip);
        }
+
+done:
        svm_set_interrupt_shadow(vcpu, 0);
 
        return 1;
@@ -484,7 +500,7 @@ static int svm_hardware_enable(void)
 
        wrmsrl(MSR_EFER, efer | EFER_SVME);
 
-       wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+       wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
 
        if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
                wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
@@ -552,6 +568,7 @@ static int svm_cpu_init(int cpu)
        sd->save_area = alloc_page(GFP_KERNEL);
        if (!sd->save_area)
                goto free_cpu_data;
+       clear_page(page_address(sd->save_area));
 
        if (svm_sev_enabled()) {
                sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
@@ -662,8 +679,8 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
        msrpm[offset] = tmp;
 }
 
-static void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
-                                int read, int write)
+void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+                         int read, int write)
 {
        set_shadow_msr_intercept(vcpu, msr, read, write);
        set_msr_interception_bitmap(vcpu, msrpm, msr, read, write);
@@ -959,15 +976,11 @@ static __init int svm_hardware_setup(void)
                kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
        }
 
-       if (sev) {
-               if (boot_cpu_has(X86_FEATURE_SEV) &&
-                   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
-                       r = sev_hardware_setup();
-                       if (r)
-                               sev = false;
-               } else {
-                       sev = false;
-               }
+       if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
+               sev_hardware_setup();
+       } else {
+               sev = false;
+               sev_es = false;
        }
 
        svm_adjust_mmio_mask();
@@ -1215,6 +1228,7 @@ static void init_vmcb(struct vcpu_svm *svm)
                save->cr4 = 0;
        }
        svm->asid_generation = 0;
+       svm->asid = 0;
 
        svm->nested.vmcb12_gpa = 0;
        svm->vcpu.arch.hflags = 0;
@@ -1252,6 +1266,11 @@ static void init_vmcb(struct vcpu_svm *svm)
        if (sev_guest(svm->vcpu.kvm)) {
                svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
                clr_exception_intercept(svm, UD_VECTOR);
+
+               if (sev_es_guest(svm->vcpu.kvm)) {
+                       /* Perform SEV-ES specific VMCB updates */
+                       sev_es_init_vmcb(svm);
+               }
        }
 
        vmcb_mark_all_dirty(svm->vmcb);
@@ -1288,6 +1307,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm;
        struct page *vmcb_page;
+       struct page *vmsa_page = NULL;
        int err;
 
        BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1298,9 +1318,27 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        if (!vmcb_page)
                goto out;
 
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests require a separate VMSA page used to contain
+                * the encrypted register state of the guest.
+                */
+               vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+               if (!vmsa_page)
+                       goto error_free_vmcb_page;
+
+               /*
+                * SEV-ES guests maintain an encrypted version of their FPU
+                * state which is restored and saved on VMRUN and VMEXIT.
+                * Free the fpu structure to prevent KVM from attempting to
+                * access the FPU state.
+                */
+               kvm_free_guest_fpu(vcpu);
+       }
+
        err = avic_init_vcpu(svm);
        if (err)
-               goto error_free_vmcb_page;
+               goto error_free_vmsa_page;
 
        /* We initialize this flag to true to make sure that the is_running
         * bit would be set the first time the vcpu is loaded.
@@ -1311,21 +1349,32 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        svm->msrpm = svm_vcpu_alloc_msrpm();
        if (!svm->msrpm) {
                err = -ENOMEM;
-               goto error_free_vmcb_page;
+               goto error_free_vmsa_page;
        }
 
        svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
        svm->vmcb = page_address(vmcb_page);
        svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+
+       if (vmsa_page)
+               svm->vmsa = page_address(vmsa_page);
+
        svm->asid_generation = 0;
        init_vmcb(svm);
 
        svm_init_osvw(vcpu);
        vcpu->arch.microcode_version = 0x01000065;
 
+       if (sev_es_guest(svm->vcpu.kvm))
+               /* Perform SEV-ES specific VMCB creation updates */
+               sev_es_create_vcpu(svm);
+
        return 0;
 
+error_free_vmsa_page:
+       if (vmsa_page)
+               __free_page(vmsa_page);
 error_free_vmcb_page:
        __free_page(vmcb_page);
 out:
@@ -1353,6 +1402,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 
        svm_free_nested(svm);
 
+       sev_free_vcpu(vcpu);
+
        __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
        __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 }
@@ -1368,15 +1419,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                vmcb_mark_all_dirty(svm->vmcb);
        }
 
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               sev_es_vcpu_load(svm, cpu);
+       } else {
 #ifdef CONFIG_X86_64
-       rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
+               rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
 #endif
-       savesegment(fs, svm->host.fs);
-       savesegment(gs, svm->host.gs);
-       svm->host.ldt = kvm_read_ldt();
+               savesegment(fs, svm->host.fs);
+               savesegment(gs, svm->host.gs);
+               svm->host.ldt = kvm_read_ldt();
 
-       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-               rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+               for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+                       rdmsrl(host_save_user_msrs[i].index,
+                              svm->host_user_msrs[i]);
+       }
 
        if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
                u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
@@ -1404,18 +1460,24 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
        avic_vcpu_put(vcpu);
 
        ++vcpu->stat.host_state_reload;
-       kvm_load_ldt(svm->host.ldt);
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               sev_es_vcpu_put(svm);
+       } else {
+               kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
-       loadsegment(fs, svm->host.fs);
-       wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
-       load_gs_index(svm->host.gs);
+               loadsegment(fs, svm->host.fs);
+               wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
+               load_gs_index(svm->host.gs);
 #else
 #ifdef CONFIG_X86_32_LAZY_GS
-       loadsegment(gs, svm->host.gs);
+               loadsegment(gs, svm->host.gs);
 #endif
 #endif
-       for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
-               wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
+
+               for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
+                       wrmsrl(host_save_user_msrs[i].index,
+                              svm->host_user_msrs[i]);
+       }
 }
 
 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
@@ -1633,9 +1695,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-       ulong gcr0 = svm->vcpu.arch.cr0;
-       u64 *hcr0 = &svm->vmcb->save.cr0;
+       ulong gcr0;
+       u64 *hcr0;
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return;
+
+       gcr0 = svm->vcpu.arch.cr0;
+       hcr0 = &svm->vmcb->save.cr0;
        *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
                | (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
@@ -1655,7 +1726,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-       if (vcpu->arch.efer & EFER_LME) {
+       if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
                        vcpu->arch.efer |= EFER_LMA;
                        svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -1684,13 +1755,15 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        update_cr0_intercept(svm);
 }
 
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static bool svm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
-       unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
+       return true;
+}
 
-       if (cr4 & X86_CR4_VMXE)
-               return 1;
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
+       unsigned long old_cr4 = vcpu->arch.cr4;
 
        if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
                svm_flush_tlb(vcpu);
@@ -1701,7 +1774,9 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        cr4 |= host_cr4_mce;
        to_svm(vcpu)->vmcb->save.cr4 = cr4;
        vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
-       return 0;
+
+       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+               kvm_update_cpuid_runtime(vcpu);
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -1753,18 +1828,20 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
                ++sd->asid_generation;
                sd->next_asid = sd->min_asid;
                svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
+               vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
        }
 
        svm->asid_generation = sd->asid_generation;
-       svm->vmcb->control.asid = sd->next_asid++;
-
-       vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+       svm->asid = sd->next_asid++;
 }
 
 static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
 {
        struct vmcb *vmcb = svm->vmcb;
 
+       if (svm->vcpu.arch.guest_state_protected)
+               return;
+
        if (unlikely(value != vmcb->save.dr6)) {
                vmcb->save.dr6 = value;
                vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1775,6 +1852,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        get_debugreg(vcpu->arch.db[0], 0);
        get_debugreg(vcpu->arch.db[1], 1);
        get_debugreg(vcpu->arch.db[2], 2);
@@ -1793,6 +1873,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        svm->vmcb->save.dr7 = value;
        vmcb_mark_dirty(svm->vmcb, VMCB_DR);
 }
@@ -1931,25 +2014,6 @@ static bool is_erratum_383(void)
        return true;
 }
 
-/*
- * Trigger machine check on the host. We assume all the MSRs are already set up
- * by the CPU and that we still run on the same CPU as the MCE occurred on.
- * We pass a fake environment to the machine check handler because we want
- * the guest to be always treated like user space, no matter what context
- * it used internally.
- */
-static void kvm_machine_check(void)
-{
-#if defined(CONFIG_X86_MCE)
-       struct pt_regs regs = {
-               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
-               .flags = X86_EFLAGS_IF,
-       };
-
-       do_machine_check(&regs);
-#endif
-}
-
 static void svm_handle_mce(struct vcpu_svm *svm)
 {
        if (is_erratum_383()) {
@@ -1981,6 +2045,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
        struct kvm_run *kvm_run = svm->vcpu.run;
 
        /*
+        * The VM save area has already been encrypted so it
+        * cannot be reinitialized - just terminate.
+        */
+       if (sev_es_guest(svm->vcpu.kvm))
+               return -EINVAL;
+
+       /*
         * VMCB is undefined after a SHUTDOWN intercept
         * so reinitialize it.
         */
@@ -2001,11 +2072,16 @@ static int io_interception(struct vcpu_svm *svm)
        ++svm->vcpu.stat.io_exits;
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-       if (string)
-               return kvm_emulate_instruction(vcpu, 0);
-
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+       if (string) {
+               if (sev_es_guest(vcpu->kvm))
+                       return sev_es_string_io(svm, size, port, in);
+               else
+                       return kvm_emulate_instruction(vcpu, 0);
+       }
+
        svm->next_rip = svm->vmcb->control.exit_info_2;
 
        return kvm_fast_pio(&svm->vcpu, size, port, in);
@@ -2269,9 +2345,11 @@ static int cpuid_interception(struct vcpu_svm *svm)
 static int iret_interception(struct vcpu_svm *svm)
 {
        ++svm->vcpu.stat.nmi_window_exits;
-       svm_clr_intercept(svm, INTERCEPT_IRET);
        svm->vcpu.arch.hflags |= HF_IRET_MASK;
-       svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm)) {
+               svm_clr_intercept(svm, INTERCEPT_IRET);
+               svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+       }
        kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        return 1;
 }
@@ -2408,6 +2486,41 @@ static int cr_interception(struct vcpu_svm *svm)
        return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+       struct kvm_vcpu *vcpu = &svm->vcpu;
+       unsigned long old_value, new_value;
+       unsigned int cr;
+       int ret = 0;
+
+       new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+       cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+       switch (cr) {
+       case 0:
+               old_value = kvm_read_cr0(vcpu);
+               svm_set_cr0(vcpu, new_value);
+
+               kvm_post_set_cr0(vcpu, old_value, new_value);
+               break;
+       case 4:
+               old_value = kvm_read_cr4(vcpu);
+               svm_set_cr4(vcpu, new_value);
+
+               kvm_post_set_cr4(vcpu, old_value, new_value);
+               break;
+       case 8:
+               ret = kvm_set_cr8(&svm->vcpu, new_value);
+               break;
+       default:
+               WARN(1, "unhandled CR%d write trap", cr);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       return kvm_complete_insn_gp(vcpu, ret);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
        int reg, dr;
@@ -2461,6 +2574,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
+static int efer_trap(struct vcpu_svm *svm)
+{
+       struct msr_data msr_info;
+       int ret;
+
+       /*
+        * Clear the EFER_SVME bit from EFER. The SVM code always sets this
+        * bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+        * whether the guest has X86_FEATURE_SVM - this avoids a failure if
+        * the guest doesn't have X86_FEATURE_SVM.
+        */
+       msr_info.host_initiated = false;
+       msr_info.index = MSR_EFER;
+       msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+       ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+       return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
        msr->data = 0;
@@ -2543,10 +2675,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                msr_info->data = svm->spec_ctrl;
@@ -2584,6 +2713,20 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        return 0;
 }
 
+static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm) || !err)
+               return kvm_complete_insn_gp(&svm->vcpu, err);
+
+       ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+       ghcb_set_sw_exit_info_2(svm->ghcb,
+                               X86_TRAP_GP |
+                               SVM_EVTINJ_TYPE_EXEPT |
+                               SVM_EVTINJ_VALID);
+       return 1;
+}
+
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
        return kvm_emulate_rdmsr(&svm->vcpu);
@@ -2630,10 +2773,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                if (kvm_spec_ctrl_test_value(data))
@@ -2658,12 +2798,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                break;
        case MSR_IA32_PRED_CMD:
                if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
+                   !guest_has_pred_cmd_msr(vcpu))
                        return 1;
 
                if (data & ~PRED_CMD_IBPB)
                        return 1;
-               if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
+               if (!boot_cpu_has(X86_FEATURE_IBPB))
                        return 1;
                if (!data)
                        break;
@@ -2805,7 +2945,14 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
 static int pause_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       bool in_kernel = (svm_get_cpl(vcpu) == 0);
+       bool in_kernel;
+
+       /*
+        * CPL is not made available for an SEV-ES guest, therefore
+        * vcpu->arch.preempted_in_kernel can never be true.  Just
+        * set in_kernel to false as well.
+        */
+       in_kernel = !sev_es_guest(svm->vcpu.kvm) && svm_get_cpl(vcpu) == 0;
 
        if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);
@@ -2920,11 +3067,16 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_RDPRU]                        = rdpru_interception,
+       [SVM_EXIT_EFER_WRITE_TRAP]              = efer_trap,
+       [SVM_EXIT_CR0_WRITE_TRAP]               = cr_trap,
+       [SVM_EXIT_CR4_WRITE_TRAP]               = cr_trap,
+       [SVM_EXIT_CR8_WRITE_TRAP]               = cr_trap,
        [SVM_EXIT_INVPCID]                      = invpcid_interception,
        [SVM_EXIT_NPF]                          = npf_interception,
        [SVM_EXIT_RSM]                          = rsm_interception,
        [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
        [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
+       [SVM_EXIT_VMGEXIT]                      = sev_handle_vmgexit,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -2966,6 +3118,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
        pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
        pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
+       pr_err("%-20s%016llx\n", "ghcb:", control->ghcb_gpa);
        pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
        pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
        pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
@@ -2973,6 +3126,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
        pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
        pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
        pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
+       pr_err("%-20s%016llx\n", "vmsa_pa:", control->vmsa_pa);
        pr_err("VMCB State Save Area:\n");
        pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
               "es:",
@@ -3045,6 +3199,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
               "excp_to:", save->last_excp_to);
 }
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+       if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+           svm_exit_handlers[exit_code])
+               return 0;
+
+       vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+       dump_vmcb(vcpu);
+       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+       vcpu->run->internal.ndata = 2;
+       vcpu->run->internal.data[0] = exit_code;
+       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+       return -EINVAL;
+}
+
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+       if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+               return 0;
+
+#ifdef CONFIG_RETPOLINE
+       if (exit_code == SVM_EXIT_MSR)
+               return msr_interception(svm);
+       else if (exit_code == SVM_EXIT_VINTR)
+               return interrupt_window_interception(svm);
+       else if (exit_code == SVM_EXIT_INTR)
+               return intr_interception(svm);
+       else if (exit_code == SVM_EXIT_HLT)
+               return halt_interception(svm);
+       else if (exit_code == SVM_EXIT_NPF)
+               return npf_interception(svm);
+#endif
+       return svm_exit_handlers[exit_code](svm);
+}
+
 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
                              u32 *intr_info, u32 *error_code)
 {
@@ -3068,10 +3259,13 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
        trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-       if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
-               vcpu->arch.cr0 = svm->vmcb->save.cr0;
-       if (npt_enabled)
-               vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       /* SEV-ES guests must use the CR write traps to track CR registers. */
+       if (!sev_es_guest(vcpu->kvm)) {
+               if (!svm_is_intercept(svm, INTERCEPT_CR0_WRITE))
+                       vcpu->arch.cr0 = svm->vmcb->save.cr0;
+               if (npt_enabled)
+                       vcpu->arch.cr3 = svm->vmcb->save.cr3;
+       }
 
        if (is_guest_mode(vcpu)) {
                int vmexit;
@@ -3108,32 +3302,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
        if (exit_fastpath != EXIT_FASTPATH_NONE)
                return 1;
 
-       if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-           || !svm_exit_handlers[exit_code]) {
-               vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
-               dump_vmcb(vcpu);
-               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-               vcpu->run->internal.suberror =
-                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-               vcpu->run->internal.ndata = 2;
-               vcpu->run->internal.data[0] = exit_code;
-               vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-               return 0;
-       }
-
-#ifdef CONFIG_RETPOLINE
-       if (exit_code == SVM_EXIT_MSR)
-               return msr_interception(svm);
-       else if (exit_code == SVM_EXIT_VINTR)
-               return interrupt_window_interception(svm);
-       else if (exit_code == SVM_EXIT_INTR)
-               return intr_interception(svm);
-       else if (exit_code == SVM_EXIT_HLT)
-               return halt_interception(svm);
-       else if (exit_code == SVM_EXIT_NPF)
-               return npf_interception(svm);
-#endif
-       return svm_exit_handlers[exit_code](svm);
+       return svm_invoke_exit_handler(svm, exit_code);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
@@ -3162,7 +3331,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 
        svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
        vcpu->arch.hflags |= HF_NMI_MASK;
-       svm_set_intercept(svm, INTERCEPT_IRET);
+       if (!sev_es_guest(svm->vcpu.kvm))
+               svm_set_intercept(svm, INTERCEPT_IRET);
        ++vcpu->stat.nmi_injections;
 }
 
@@ -3183,6 +3353,13 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       /*
+        * SEV-ES guests must always keep the CR intercepts cleared. CR
+        * tracking is done using the CR write traps.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return;
+
        if (nested_svm_virtualize_tpr(vcpu))
                return;
 
@@ -3239,10 +3416,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
        if (masked) {
                svm->vcpu.arch.hflags |= HF_NMI_MASK;
-               svm_set_intercept(svm, INTERCEPT_IRET);
+               if (!sev_es_guest(svm->vcpu.kvm))
+                       svm_set_intercept(svm, INTERCEPT_IRET);
        } else {
                svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
-               svm_clr_intercept(svm, INTERCEPT_IRET);
+               if (!sev_es_guest(svm->vcpu.kvm))
+                       svm_clr_intercept(svm, INTERCEPT_IRET);
        }
 }
 
@@ -3254,7 +3433,14 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        if (!gif_set(svm))
                return true;
 
-       if (is_guest_mode(vcpu)) {
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               /*
+                * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
+                * bit to determine the state of the IF flag.
+                */
+               if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
+                       return true;
+       } else if (is_guest_mode(vcpu)) {
                /* As long as interrupts are being delivered...  */
                if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
                    ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF)
@@ -3413,8 +3599,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
         * If we've made progress since setting HF_IRET_MASK, we've
         * executed an IRET and can allow NMI injection.
         */
-       if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
-           && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
+       if ((svm->vcpu.arch.hflags & HF_IRET_MASK) &&
+           (sev_es_guest(svm->vcpu.kvm) ||
+            kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) {
                svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
                kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        }
@@ -3437,6 +3624,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                break;
        case SVM_EXITINTINFO_TYPE_EXEPT:
                /*
+                * Never re-inject a #VC exception.
+                */
+               if (vector == X86_TRAP_VC)
+                       break;
+
+               /*
                 * In case of software exceptions, do not reinject the vector,
                 * but re-execute the instruction instead. Rewind RIP first
                 * if we emulated INT3 before.
@@ -3509,16 +3702,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
        guest_enter_irqoff();
        lockdep_hardirqs_on(CALLER_ADDR0);
 
-       __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               __svm_sev_es_vcpu_run(svm->vmcb_pa);
+       } else {
+               __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
 
 #ifdef CONFIG_X86_64
-       native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+               native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
-       loadsegment(fs, svm->host.fs);
+               loadsegment(fs, svm->host.fs);
 #ifndef CONFIG_X86_32_LAZY_GS
-       loadsegment(gs, svm->host.gs);
+               loadsegment(gs, svm->host.gs);
 #endif
 #endif
+       }
 
        /*
         * VMEXIT disables interrupts (host state), but tracing and lockdep
@@ -3568,6 +3765,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        sync_lapic_to_cr8(vcpu);
 
+       if (unlikely(svm->asid != svm->vmcb->control.asid)) {
+               svm->vmcb->control.asid = svm->asid;
+               vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
+       }
        svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
        /*
@@ -3612,14 +3813,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
        if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
-       reload_tss(vcpu);
+       if (!sev_es_guest(svm->vcpu.kvm))
+               reload_tss(vcpu);
 
        x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
 
-       vcpu->arch.cr2 = svm->vmcb->save.cr2;
-       vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
-       vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
-       vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+       if (!sev_es_guest(svm->vcpu.kvm)) {
+               vcpu->arch.cr2 = svm->vmcb->save.cr2;
+               vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+               vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+               vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+       }
 
        if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
                kvm_before_interrupt(&svm->vcpu);
@@ -3722,12 +3926,21 @@ static bool svm_cpu_has_accelerated_tpr(void)
        return false;
 }
 
-static bool svm_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 {
        switch (index) {
        case MSR_IA32_MCG_EXT_CTL:
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                return false;
+       case MSR_IA32_SMBASE:
+               /* SEV-ES guests do not support SMM, so report false */
+               if (kvm && sev_es_guest(kvm))
+                       return false;
+               break;
        default:
                break;
        }
@@ -4086,6 +4299,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i
        unsigned long cr4;
 
        /*
+        * When the guest is an SEV-ES guest, emulation is not possible.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return false;
+
+       /*
         * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
         *
         * Errata:
@@ -4217,6 +4436,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .get_cpl = svm_get_cpl,
        .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
        .set_cr0 = svm_set_cr0,
+       .is_valid_cr4 = svm_is_valid_cr4,
        .set_cr4 = svm_set_cr4,
        .set_efer = svm_set_efer,
        .get_idt = svm_get_idt,
@@ -4305,6 +4525,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
        .msr_filter_changed = svm_msr_filter_changed,
+       .complete_emulated_msr = svm_complete_emulated_msr,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
index 1d853fe..5431e63 100644 (file)
 
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
+#include <linux/bits.h>
 
 #include <asm/svm.h>
 
-static const u32 host_save_user_msrs[] = {
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
+static const struct svm_host_save_msrs {
+       u32 index;              /* Index of the MSR */
+       bool sev_es_restored;   /* True if MSR is restored on SEV-ES VMEXIT */
+} host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
-       MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
-       MSR_FS_BASE,
+       { .index = MSR_STAR,                    .sev_es_restored = true },
+       { .index = MSR_LSTAR,                   .sev_es_restored = true },
+       { .index = MSR_CSTAR,                   .sev_es_restored = true },
+       { .index = MSR_SYSCALL_MASK,            .sev_es_restored = true },
+       { .index = MSR_KERNEL_GS_BASE,          .sev_es_restored = true },
+       { .index = MSR_FS_BASE,                 .sev_es_restored = true },
 #endif
-       MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
-       MSR_TSC_AUX,
+       { .index = MSR_IA32_SYSENTER_CS,        .sev_es_restored = true },
+       { .index = MSR_IA32_SYSENTER_ESP,       .sev_es_restored = true },
+       { .index = MSR_IA32_SYSENTER_EIP,       .sev_es_restored = true },
+       { .index = MSR_TSC_AUX,                 .sev_es_restored = false },
 };
-
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 
-#define MAX_DIRECT_ACCESS_MSRS 15
+#define MAX_DIRECT_ACCESS_MSRS 18
 #define MSRPM_OFFSETS  16
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
@@ -61,11 +72,13 @@ enum {
 
 struct kvm_sev_info {
        bool active;            /* SEV enabled guest */
+       bool es_active;         /* SEV-ES enabled guest */
        unsigned int asid;      /* ASID used for this guest */
        unsigned int handle;    /* SEV firmware handle */
        int fd;                 /* SEV device fd */
        unsigned long pages_locked; /* Number of pages locked */
        struct list_head regions_list;  /* List of registered regions */
+       u64 ap_jump_table;      /* SEV-ES AP Jump Table address */
 };
 
 struct kvm_svm {
@@ -106,6 +119,7 @@ struct vcpu_svm {
        struct vmcb *vmcb;
        unsigned long vmcb_pa;
        struct svm_cpu_data *svm_data;
+       u32 asid;
        uint64_t asid_generation;
        uint64_t sysenter_esp;
        uint64_t sysenter_eip;
@@ -166,6 +180,17 @@ struct vcpu_svm {
                DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
                DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
        } shadow_msr_intercept;
+
+       /* SEV-ES support */
+       struct vmcb_save_area *vmsa;
+       struct ghcb *ghcb;
+       struct kvm_host_map ghcb_map;
+
+       /* SEV-ES scratch area support */
+       void *ghcb_sa;
+       u64 ghcb_sa_len;
+       bool ghcb_sa_sync;
+       bool ghcb_sa_free;
 };
 
 struct svm_cpu_data {
@@ -193,6 +218,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
        return container_of(kvm, struct kvm_svm, kvm);
 }
 
+static inline bool sev_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+       return sev->active;
+#else
+       return false;
+#endif
+}
+
+static inline bool sev_es_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+       return sev_guest(kvm) && sev->es_active;
+#else
+       return false;
+#endif
+}
+
 static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
 {
        vmcb->control.clean = 0;
@@ -244,21 +291,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
 {
        struct vmcb *vmcb = get_host_vmcb(svm);
 
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+       if (!sev_es_guest(svm->vcpu.kvm)) {
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+       }
+
        vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
        vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
 
        recalc_intercepts(svm);
@@ -270,6 +320,12 @@ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
 
        vmcb->control.intercepts[INTERCEPT_DR] = 0;
 
+       /* DR7 access must remain intercepted for an SEV-ES guest */
+       if (sev_es_guest(svm->vcpu.kvm)) {
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+       }
+
        recalc_intercepts(svm);
 }
 
@@ -351,6 +407,10 @@ static inline bool gif_set(struct vcpu_svm *svm)
 #define MSR_CR3_LONG_MBZ_MASK                  0xfff0000000000000U
 #define MSR_INVALID                            0xffffffffU
 
+extern int sev;
+extern int sev_es;
+extern bool dump_invalid_vmcb;
+
 u32 svm_msrpm_offset(u32 msr);
 u32 *svm_vcpu_alloc_msrpm(void);
 void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
@@ -358,13 +418,16 @@ void svm_vcpu_free_msrpm(u32 *msrpm);
 
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void svm_flush_tlb(struct kvm_vcpu *vcpu);
 void disable_nmi_singlestep(struct vcpu_svm *svm);
 bool svm_smi_blocked(struct kvm_vcpu *vcpu);
 bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
 bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
 void svm_set_gif(struct vcpu_svm *svm, bool value);
+int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code);
+void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
+                         int read, int write);
 
 /* nested.c */
 
@@ -470,18 +533,42 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
-extern unsigned int max_sev_asid;
+#define GHCB_VERSION_MAX               1ULL
+#define GHCB_VERSION_MIN               1ULL
+
+#define GHCB_MSR_INFO_POS              0
+#define GHCB_MSR_INFO_MASK             (BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP         0x001
+#define GHCB_MSR_SEV_INFO_REQ          0x002
+#define GHCB_MSR_VER_MAX_POS           48
+#define GHCB_MSR_VER_MAX_MASK          0xffff
+#define GHCB_MSR_VER_MIN_POS           32
+#define GHCB_MSR_VER_MIN_MASK          0xffff
+#define GHCB_MSR_CBIT_POS              24
+#define GHCB_MSR_CBIT_MASK             0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)                           \
+       ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |   \
+        (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |   \
+        (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |        \
+        GHCB_MSR_SEV_INFO_RESP)
+
+#define GHCB_MSR_CPUID_REQ             0x004
+#define GHCB_MSR_CPUID_RESP            0x005
+#define GHCB_MSR_CPUID_FUNC_POS                32
+#define GHCB_MSR_CPUID_FUNC_MASK       0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS       32
+#define GHCB_MSR_CPUID_VALUE_MASK      0xffffffff
+#define GHCB_MSR_CPUID_REG_POS         30
+#define GHCB_MSR_CPUID_REG_MASK                0x3
+
+#define GHCB_MSR_TERM_REQ              0x100
+#define GHCB_MSR_TERM_REASON_SET_POS   12
+#define GHCB_MSR_TERM_REASON_SET_MASK  0xf
+#define GHCB_MSR_TERM_REASON_POS       16
+#define GHCB_MSR_TERM_REASON_MASK      0xff
 
-static inline bool sev_guest(struct kvm *kvm)
-{
-#ifdef CONFIG_KVM_AMD_SEV
-       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-
-       return sev->active;
-#else
-       return false;
-#endif
-}
+extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
 {
@@ -495,7 +582,19 @@ int svm_register_enc_region(struct kvm *kvm,
 int svm_unregister_enc_region(struct kvm *kvm,
                              struct kvm_enc_region *range);
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
-int __init sev_hardware_setup(void);
+void __init sev_hardware_setup(void);
 void sev_hardware_teardown(void);
+void sev_free_vcpu(struct kvm_vcpu *vcpu);
+int sev_handle_vmgexit(struct vcpu_svm *svm);
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
+void sev_es_init_vmcb(struct vcpu_svm *svm);
+void sev_es_create_vcpu(struct vcpu_svm *svm);
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
+void sev_es_vcpu_put(struct vcpu_svm *svm);
+
+/* vmenter.S */
+
+void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
+void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
 
 #endif
index 1ec1ac4..6feb8c0 100644 (file)
@@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run)
        pop %_ASM_BP
        ret
 SYM_FUNC_END(__svm_vcpu_run)
+
+/**
+ * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
+ * @vmcb_pa:   unsigned long
+ */
+SYM_FUNC_START(__svm_sev_es_vcpu_run)
+       push %_ASM_BP
+#ifdef CONFIG_X86_64
+       push %r15
+       push %r14
+       push %r13
+       push %r12
+#else
+       push %edi
+       push %esi
+#endif
+       push %_ASM_BX
+
+       /* Enter guest mode */
+       mov %_ASM_ARG1, %_ASM_AX
+       sti
+
+1:     vmrun %_ASM_AX
+       jmp 3f
+2:     cmpb $0, kvm_rebooting
+       jne 3f
+       ud2
+       _ASM_EXTABLE(1b, 2b)
+
+3:     cli
+
+#ifdef CONFIG_RETPOLINE
+       /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+       FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
+       pop %_ASM_BX
+
+#ifdef CONFIG_X86_64
+       pop %r12
+       pop %r13
+       pop %r14
+       pop %r15
+#else
+       pop %esi
+       pop %edi
+#endif
+       pop %_ASM_BP
+       ret
+SYM_FUNC_END(__svm_sev_es_vcpu_run)
index aef960f..2de30c2 100644 (file)
@@ -1578,6 +1578,103 @@ TRACE_EVENT(kvm_hv_syndbg_get_msr,
                  __entry->vcpu_id, __entry->vp_index, __entry->msr,
                  __entry->data)
 );
+
+/*
+ * Tracepoint for the start of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_enter,
+       TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+       TP_ARGS(vcpu_id, ghcb),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, exit_reason)
+               __field(u64, info1)
+               __field(u64, info2)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id     = vcpu_id;
+               __entry->exit_reason = ghcb->save.sw_exit_code;
+               __entry->info1       = ghcb->save.sw_exit_info_1;
+               __entry->info2       = ghcb->save.sw_exit_info_2;
+       ),
+
+       TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+                 __entry->vcpu_id, __entry->exit_reason,
+                 __entry->info1, __entry->info2)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_exit,
+       TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+       TP_ARGS(vcpu_id, ghcb),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, exit_reason)
+               __field(u64, info1)
+               __field(u64, info2)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id     = vcpu_id;
+               __entry->exit_reason = ghcb->save.sw_exit_code;
+               __entry->info1       = ghcb->save.sw_exit_info_1;
+               __entry->info2       = ghcb->save.sw_exit_info_2;
+       ),
+
+       TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+                 __entry->vcpu_id, __entry->exit_reason,
+                 __entry->info1, __entry->info2)
+);
+
+/*
+ * Tracepoint for the start of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_enter,
+       TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa),
+       TP_ARGS(vcpu_id, ghcb_gpa),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, ghcb_gpa)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu_id;
+               __entry->ghcb_gpa = ghcb_gpa;
+       ),
+
+       TP_printk("vcpu %u, ghcb_gpa %016llx",
+                 __entry->vcpu_id, __entry->ghcb_gpa)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_exit,
+       TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result),
+       TP_ARGS(vcpu_id, ghcb_gpa, result),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, vcpu_id)
+               __field(u64, ghcb_gpa)
+               __field(int, result)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu_id;
+               __entry->ghcb_gpa = ghcb_gpa;
+               __entry->result   = result;
+       ),
+
+       TP_printk("vcpu %u, ghcb_gpa %016llx, result %d",
+                 __entry->vcpu_id, __entry->ghcb_gpa, __entry->result)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index f3199bb..41f2466 100644 (file)
@@ -326,7 +326,6 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
 
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
        /*
         * vmcs_version represents the range of supported Enlightened VMCS
         * versions: lower 8 bits is the minimal version, higher 8 bits is the
@@ -334,7 +333,7 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
         * KVM_EVMCS_VERSION.
         */
        if (kvm_cpu_cap_get(X86_FEATURE_VMX) &&
-           vmx->nested.enlightened_vmcs_enabled)
+           (!vcpu || to_vmx(vcpu)->nested.enlightened_vmcs_enabled))
                return (KVM_EVMCS_VERSION << 8) | 1;
 
        return 0;
index 89af692..e2f2656 100644 (file)
@@ -2952,7 +2952,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
 static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
 {
        if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
-              vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT))
+              vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
+              vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
                return -EINVAL;
 
        return 0;
@@ -3559,19 +3560,29 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
         */
        nested_cache_shadow_vmcs12(vcpu, vmcs12);
 
-       /*
-        * If we're entering a halted L2 vcpu and the L2 vcpu won't be
-        * awakened by event injection or by an NMI-window VM-exit or
-        * by an interrupt-window VM-exit, halt the vcpu.
-        */
-       if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
-           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
-           !(vmcs12->cpu_based_vm_exec_control & CPU_BASED_NMI_WINDOW_EXITING) &&
-           !((vmcs12->cpu_based_vm_exec_control & CPU_BASED_INTR_WINDOW_EXITING) &&
-             (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
+       switch (vmcs12->guest_activity_state) {
+       case GUEST_ACTIVITY_HLT:
+               /*
+                * If we're entering a halted L2 vcpu and the L2 vcpu won't be
+                * awakened by event injection or by an NMI-window VM-exit or
+                * by an interrupt-window VM-exit, halt the vcpu.
+                */
+               if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
+                   !nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
+                   !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
+                     (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
+                       vmx->nested.nested_run_pending = 0;
+                       return kvm_vcpu_halt(vcpu);
+               }
+               break;
+       case GUEST_ACTIVITY_WAIT_SIPI:
                vmx->nested.nested_run_pending = 0;
-               return kvm_vcpu_halt(vcpu);
+               vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+               break;
+       default:
+               break;
        }
+
        return 1;
 
 vmentry_failed:
@@ -3797,7 +3808,20 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
                        return -EBUSY;
                nested_vmx_update_pending_dbg(vcpu);
                clear_bit(KVM_APIC_INIT, &apic->pending_events);
-               nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+               if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
+               return 0;
+       }
+
+       if (lapic_in_kernel(vcpu) &&
+           test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
+               if (block_nested_events)
+                       return -EBUSY;
+
+               clear_bit(KVM_APIC_SIPI, &apic->pending_events);
+               if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
+                                               apic->sipi_vector & 0xFFUL);
                return 0;
        }
 
@@ -4036,6 +4060,8 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
                vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
+       else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
+               vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
        else
                vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
@@ -4814,7 +4840,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        /*
         * The Intel VMX Instruction Reference lists a bunch of bits that are
         * prerequisite to running VMXON, most notably cr4.VMXE must be set to
-        * 1 (see vmx_set_cr4() for when we allow the guest to set this).
+        * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
         * Otherwise, we should fail with #UD.  But most faulting conditions
         * have already been checked by hardware, prior to the VM-exit for
         * VMXON.  We do test guest cr4.VMXE because processor CR4 always has
@@ -6483,7 +6509,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
        msrs->misc_low |=
                MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
                VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
-               VMX_MISC_ACTIVITY_HLT;
+               VMX_MISC_ACTIVITY_HLT |
+               VMX_MISC_ACTIVITY_WAIT_SIPI;
        msrs->misc_high = 0;
 
        /*
index 90ad7a6..e85aa5f 100644 (file)
@@ -132,7 +132,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
        mov (%_ASM_SP), %_ASM_AX
 
        /* Check if vmlaunch or vmresume is needed */
-       cmpb $0, %bl
+       testb %bl, %bl
 
        /* Load guest registers.  Don't clobber flags. */
        mov VCPU_RCX(%_ASM_AX), %_ASM_CX
index 47b8357..75c9c6a 100644 (file)
@@ -40,7 +40,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/kexec.h>
 #include <asm/perf_event.h>
-#include <asm/mce.h>
 #include <asm/mmu_context.h>
 #include <asm/mshyperv.h>
 #include <asm/mwait.h>
@@ -1826,7 +1825,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                msr_info->data = to_vmx(vcpu)->spec_ctrl;
@@ -2028,7 +2027,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+                   !guest_has_spec_ctrl_msr(vcpu))
                        return 1;
 
                if (kvm_spec_ctrl_test_value(data))
@@ -2063,12 +2062,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                goto find_uret_msr;
        case MSR_IA32_PRED_CMD:
                if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+                   !guest_has_pred_cmd_msr(vcpu))
                        return 1;
 
                if (data & ~PRED_CMD_IBPB)
                        return 1;
-               if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+               if (!boot_cpu_has(X86_FEATURE_IBPB))
                        return 1;
                if (!data)
                        break;
@@ -3095,8 +3094,25 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
                vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
-int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
+       /*
+        * We operate under the default treatment of SMM, so VMX cannot be
+        * enabled under SMM.  Note, whether or not VMXE is allowed at all is
+        * handled by kvm_is_valid_cr4().
+        */
+       if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
+               return false;
+
+       if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
+               return false;
+
+       return true;
+}
+
+void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+       unsigned long old_cr4 = vcpu->arch.cr4;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        /*
         * Pass through host's Machine Check Enable value to hw_cr4, which
@@ -3123,21 +3139,6 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                }
        }
 
-       if (cr4 & X86_CR4_VMXE) {
-               /*
-                * To use VMXON (and later other VMX instructions), a guest
-                * must first be able to turn on cr4.VMXE (see handle_vmon()).
-                * So basically the check on whether to allow nested VMX
-                * is here.  We operate under the default treatment of SMM,
-                * so VMX cannot be enabled under SMM.
-                */
-               if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
-                       return 1;
-       }
-
-       if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
-               return 1;
-
        vcpu->arch.cr4 = cr4;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
 
@@ -3168,7 +3169,9 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 
        vmcs_writel(CR4_READ_SHADOW, cr4);
        vmcs_writel(GUEST_CR4, hw_cr4);
-       return 0;
+
+       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+               kvm_update_cpuid_runtime(vcpu);
 }
 
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
@@ -3515,42 +3518,33 @@ bool __vmx_guest_state_valid(struct kvm_vcpu *vcpu)
        return true;
 }
 
-static int init_rmode_tss(struct kvm *kvm)
+static int init_rmode_tss(struct kvm *kvm, void __user *ua)
 {
-       gfn_t fn;
-       u16 data = 0;
-       int idx, r;
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+       u16 data;
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               if (__copy_to_user(ua + PAGE_SIZE * i, zero_page, PAGE_SIZE))
+                       return -EFAULT;
+       }
 
-       idx = srcu_read_lock(&kvm->srcu);
-       fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
-       r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
        data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
-       r = kvm_write_guest_page(kvm, fn++, &data,
-                       TSS_IOPB_BASE_OFFSET, sizeof(u16));
-       if (r < 0)
-               goto out;
-       r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
-       r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
+       if (__copy_to_user(ua + TSS_IOPB_BASE_OFFSET, &data, sizeof(u16)))
+               return -EFAULT;
+
        data = ~0;
-       r = kvm_write_guest_page(kvm, fn, &data,
-                                RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
-                                sizeof(u8));
-out:
-       srcu_read_unlock(&kvm->srcu, idx);
-       return r;
+       if (__copy_to_user(ua + RMODE_TSS_SIZE - 1, &data, sizeof(u8)))
+               return -EFAULT;
+
+       return 0;
 }
 
 static int init_rmode_identity_map(struct kvm *kvm)
 {
        struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
        int i, r = 0;
-       kvm_pfn_t identity_map_pfn;
+       void __user *uaddr;
        u32 tmp;
 
        /* Protect kvm_vmx->ept_identity_pagetable_done. */
@@ -3561,24 +3555,24 @@ static int init_rmode_identity_map(struct kvm *kvm)
 
        if (!kvm_vmx->ept_identity_map_addr)
                kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
-       identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
 
-       r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-                                   kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
-       if (r < 0)
+       uaddr = __x86_set_memory_region(kvm,
+                                       IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+                                       kvm_vmx->ept_identity_map_addr,
+                                       PAGE_SIZE);
+       if (IS_ERR(uaddr)) {
+               r = PTR_ERR(uaddr);
                goto out;
+       }
 
-       r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
-       if (r < 0)
-               goto out;
        /* Set up identity-mapping pagetable for EPT in real mode */
        for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
                tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
                        _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
-               r = kvm_write_guest_page(kvm, identity_map_pfn,
-                               &tmp, i * sizeof(tmp), sizeof(tmp));
-               if (r < 0)
+               if (__copy_to_user(uaddr + i * sizeof(tmp), &tmp, sizeof(tmp))) {
+                       r = -EFAULT;
                        goto out;
+               }
        }
        kvm_vmx->ept_identity_pagetable_done = true;
 
@@ -3605,19 +3599,22 @@ static void seg_setup(int seg)
 static int alloc_apic_access_page(struct kvm *kvm)
 {
        struct page *page;
-       int r = 0;
+       void __user *hva;
+       int ret = 0;
 
        mutex_lock(&kvm->slots_lock);
        if (kvm->arch.apic_access_page_done)
                goto out;
-       r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
-                                   APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
-       if (r)
+       hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+                                     APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
+       if (IS_ERR(hva)) {
+               ret = PTR_ERR(hva);
                goto out;
+       }
 
        page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
        if (is_error_page(page)) {
-               r = -EFAULT;
+               ret = -EFAULT;
                goto out;
        }
 
@@ -3629,7 +3626,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
        kvm->arch.apic_access_page_done = true;
 out:
        mutex_unlock(&kvm->slots_lock);
-       return r;
+       return ret;
 }
 
 int allocate_vpid(void)
@@ -4638,7 +4635,7 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
-       int ret;
+       void __user *ret;
 
        if (enable_unrestricted_guest)
                return 0;
@@ -4648,10 +4645,12 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
                                      PAGE_SIZE * 3);
        mutex_unlock(&kvm->slots_lock);
 
-       if (ret)
-               return ret;
+       if (IS_ERR(ret))
+               return PTR_ERR(ret);
+
        to_kvm_vmx(kvm)->tss_addr = addr;
-       return init_rmode_tss(kvm);
+
+       return init_rmode_tss(kvm, ret);
 }
 
 static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
@@ -4716,25 +4715,6 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
        return 1;
 }
 
-/*
- * Trigger machine check on the host. We assume all the MSRs are already set up
- * by the CPU and that we still run on the same CPU as the MCE occurred on.
- * We pass a fake environment to the machine check handler because we want
- * the guest to be always treated like user space, no matter what context
- * it used internally.
- */
-static void kvm_machine_check(void)
-{
-#if defined(CONFIG_X86_MCE)
-       struct pt_regs regs = {
-               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
-               .flags = X86_EFLAGS_IF,
-       };
-
-       do_machine_check(&regs);
-#endif
-}
-
 static int handle_machine_check(struct kvm_vcpu *vcpu)
 {
        /* handled by vmx_vcpu_run() */
@@ -6399,7 +6379,11 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
                handle_exception_nmi_irqoff(vmx);
 }
 
-static bool vmx_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
 {
        switch (index) {
        case MSR_IA32_SMBASE:
@@ -7558,7 +7542,7 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
 
 static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 {
-       return to_vmx(vcpu)->nested.vmxon;
+       return to_vmx(vcpu)->nested.vmxon && !is_guest_mode(vcpu);
 }
 
 static void vmx_migrate_timers(struct kvm_vcpu *vcpu)
@@ -7587,6 +7571,11 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
        return supported & BIT(bit);
 }
 
+static int vmx_cpu_dirty_log_size(void)
+{
+       return enable_pml ? PML_ENTITY_NUM : 0;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .hardware_unsetup = hardware_unsetup,
 
@@ -7616,6 +7605,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .get_cpl = vmx_get_cpl,
        .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
        .set_cr0 = vmx_set_cr0,
+       .is_valid_cr4 = vmx_is_valid_cr4,
        .set_cr4 = vmx_set_cr4,
        .set_efer = vmx_set_efer,
        .get_idt = vmx_get_idt,
@@ -7715,6 +7705,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .migrate_timers = vmx_migrate_timers,
 
        .msr_filter_changed = vmx_msr_filter_changed,
+       .complete_emulated_msr = kvm_complete_insn_gp,
+       .cpu_dirty_log_size = vmx_cpu_dirty_log_size,
 };
 
 static __init int hardware_setup(void)
@@ -7832,6 +7824,7 @@ static __init int hardware_setup(void)
                vmx_x86_ops.slot_disable_log_dirty = NULL;
                vmx_x86_ops.flush_log_dirty = NULL;
                vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
+               vmx_x86_ops.cpu_dirty_log_size = NULL;
        }
 
        if (!cpu_has_vmx_preemption_timer())
index f6f66e5..9d3a557 100644 (file)
@@ -321,7 +321,7 @@ u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu);
 void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask);
 int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer);
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx);
 void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
index e545a8a..3f7c1fc 100644 (file)
@@ -197,7 +197,8 @@ EXPORT_SYMBOL_GPL(host_efer);
 bool __read_mostly allow_smaller_maxphyaddr = 0;
 EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
 
-static u64 __read_mostly host_xss;
+u64 __read_mostly host_xss;
+EXPORT_SYMBOL_GPL(host_xss);
 u64 __read_mostly supported_xss;
 EXPORT_SYMBOL_GPL(supported_xss);
 
@@ -804,11 +805,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(pdptrs_changed);
 
+void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
+{
+       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
+
+       if ((cr0 ^ old_cr0) & X86_CR0_PG) {
+               kvm_clear_async_pf_completion_queue(vcpu);
+               kvm_async_pf_hash_reset(vcpu);
+       }
+
+       if ((cr0 ^ old_cr0) & update_bits)
+               kvm_mmu_reset_context(vcpu);
+
+       if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+           kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+           !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+               kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+}
+EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
+
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        unsigned long old_cr0 = kvm_read_cr0(vcpu);
        unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
-       unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
 
        cr0 |= X86_CR0_ET;
 
@@ -847,18 +866,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
        kvm_x86_ops.set_cr0(vcpu, cr0);
 
-       if ((cr0 ^ old_cr0) & X86_CR0_PG) {
-               kvm_clear_async_pf_completion_queue(vcpu);
-               kvm_async_pf_hash_reset(vcpu);
-       }
-
-       if ((cr0 ^ old_cr0) & update_bits)
-               kvm_mmu_reset_context(vcpu);
-
-       if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
-           kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
-           !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
-               kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
+       kvm_post_set_cr0(vcpu, old_cr0, cr0);
 
        return 0;
 }
@@ -872,6 +880,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
 
                if (vcpu->arch.xcr0 != host_xcr0)
@@ -892,6 +903,9 @@ EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
 
 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->arch.guest_state_protected)
+               return;
+
        if (static_cpu_has(X86_FEATURE_PKU) &&
            (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
             (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
@@ -964,26 +978,36 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 }
 EXPORT_SYMBOL_GPL(kvm_set_xcr);
 
-int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & cr4_reserved_bits)
-               return -EINVAL;
+               return false;
 
        if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
-               return -EINVAL;
+               return false;
 
-       return 0;
+       return kvm_x86_ops.is_valid_cr4(vcpu, cr4);
+}
+EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
+
+void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
+{
+       unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+                                     X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+       if (((cr4 ^ old_cr4) & mmu_role_bits) ||
+           (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
+               kvm_mmu_reset_context(vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_valid_cr4);
+EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
 
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
        unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
                                   X86_CR4_SMEP;
-       unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
 
-       if (kvm_valid_cr4(vcpu, cr4))
+       if (!kvm_is_valid_cr4(vcpu, cr4))
                return 1;
 
        if (is_long_mode(vcpu)) {
@@ -1006,15 +1030,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                        return 1;
        }
 
-       if (kvm_x86_ops.set_cr4(vcpu, cr4))
-               return 1;
+       kvm_x86_ops.set_cr4(vcpu, cr4);
 
-       if (((cr4 ^ old_cr4) & mmu_role_bits) ||
-           (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
-               kvm_mmu_reset_context(vcpu);
-
-       if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
-               kvm_update_cpuid_runtime(vcpu);
+       kvm_post_set_cr4(vcpu, old_cr4, cr4);
 
        return 0;
 }
@@ -1638,27 +1656,20 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_set_msr);
 
-static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read)
+static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
 {
-       if (vcpu->run->msr.error) {
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       } else if (is_read) {
+       int err = vcpu->run->msr.error;
+       if (!err) {
                kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
                kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
        }
 
-       return kvm_skip_emulated_instruction(vcpu);
-}
-
-static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
-{
-       return complete_emulated_msr(vcpu, true);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, err);
 }
 
 static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
 {
-       return complete_emulated_msr(vcpu, false);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, vcpu->run->msr.error);
 }
 
 static u64 kvm_msr_reason(int r)
@@ -1721,18 +1732,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       /* MSR read failed? Inject a #GP */
-       if (r) {
+       if (!r) {
+               trace_kvm_msr_read(ecx, data);
+
+               kvm_rax_write(vcpu, data & -1u);
+               kvm_rdx_write(vcpu, (data >> 32) & -1u);
+       } else {
                trace_kvm_msr_read_ex(ecx);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
        }
 
-       trace_kvm_msr_read(ecx, data);
-
-       kvm_rax_write(vcpu, data & -1u);
-       kvm_rdx_write(vcpu, (data >> 32) & -1u);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, r);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
 
@@ -1753,15 +1762,12 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
        if (r < 0)
                return r;
 
-       /* MSR write failed? Inject a #GP */
-       if (r > 0) {
+       if (!r)
+               trace_kvm_msr_write(ecx, data);
+       else
                trace_kvm_msr_write_ex(ecx, data);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
 
-       trace_kvm_msr_write(ecx, data);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_x86_ops.complete_emulated_msr(vcpu, r);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
 
@@ -3678,6 +3684,27 @@ static inline bool kvm_can_mwait_in_guest(void)
                boot_cpu_has(X86_FEATURE_ARAT);
 }
 
+static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
+                                           struct kvm_cpuid2 __user *cpuid_arg)
+{
+       struct kvm_cpuid2 cpuid;
+       int r;
+
+       r = -EFAULT;
+       if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
+               return r;
+
+       r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
+       if (r)
+               return r;
+
+       r = -EFAULT;
+       if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
+               return r;
+
+       return 0;
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
        int r = 0;
@@ -3714,6 +3741,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_HYPERV_SEND_IPI:
        case KVM_CAP_HYPERV_CPUID:
+       case KVM_CAP_SYS_HYPERV_CPUID:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3762,7 +3790,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                 * fringe case that is not enabled except via specific settings
                 * of the module parameters.
                 */
-               r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
+               r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE);
                break;
        case KVM_CAP_VAPIC:
                r = !kvm_x86_ops.cpu_has_accelerated_tpr();
@@ -3899,6 +3927,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
        case KVM_GET_MSRS:
                r = msr_io(NULL, argp, do_get_msr_feature, 1);
                break;
+       case KVM_GET_SUPPORTED_HV_CPUID:
+               r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
+               break;
        default:
                r = -EINVAL;
                break;
@@ -3997,7 +4028,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
        int idx;
 
-       if (vcpu->preempted)
+       if (vcpu->preempted && !vcpu->arch.guest_state_protected)
                vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
 
        /*
@@ -4481,6 +4512,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
+       if (!vcpu->arch.guest_fpu)
+               return;
+
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                memset(guest_xsave, 0, sizeof(struct kvm_xsave));
                fill_xsave((u8 *) guest_xsave->region, vcpu);
@@ -4498,9 +4532,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                                        struct kvm_xsave *guest_xsave)
 {
-       u64 xstate_bv =
-               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
-       u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
+       u64 xstate_bv;
+       u32 mxcsr;
+
+       if (!vcpu->arch.guest_fpu)
+               return 0;
+
+       xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
+       mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                /*
@@ -4977,25 +5016,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        }
-       case KVM_GET_SUPPORTED_HV_CPUID: {
-               struct kvm_cpuid2 __user *cpuid_arg = argp;
-               struct kvm_cpuid2 cpuid;
-
-               r = -EFAULT;
-               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
-                       goto out;
-
-               r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
-                                               cpuid_arg->entries);
-               if (r)
-                       goto out;
-
-               r = -EFAULT;
-               if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
-                       goto out;
-               r = 0;
+       case KVM_GET_SUPPORTED_HV_CPUID:
+               r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
                break;
-       }
        default:
                r = -EINVAL;
        }
@@ -5776,7 +5799,7 @@ static void kvm_init_msr_list(void)
        }
 
        for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
-               if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
+               if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i]))
                        continue;
 
                emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
@@ -8158,7 +8181,14 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+       /*
+        * if_flag is obsolete and useless, so do not bother
+        * setting it for SEV-ES guests.  Userspace can just
+        * use kvm_run->ready_for_interrupt_injection.
+        */
+       kvm_run->if_flag = !vcpu->arch.guest_state_protected
+               && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+
        kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
@@ -8748,6 +8778,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        bool req_immediate_exit = false;
 
+       /* Forbid vmenter if vcpu dirty ring is soft-full */
+       if (unlikely(vcpu->kvm->dirty_ring_size &&
+                    kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
+               vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
+               trace_kvm_dirty_ring_exit(vcpu);
+               r = 0;
+               goto out;
+       }
+
        if (kvm_request_pending(vcpu)) {
                if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
                        if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
@@ -9223,9 +9262,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
        kvm_save_current_fpu(vcpu->arch.user_fpu);
 
-       /* PKRU is separately restored in kvm_x86_ops.run.  */
-       __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
-                               ~XFEATURE_MASK_PKRU);
+       /*
+        * Guests with protected state can't have it set by the hypervisor,
+        * so skip trying to set it.
+        */
+       if (vcpu->arch.guest_fpu)
+               /* PKRU is separately restored in kvm_x86_ops.run. */
+               __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
+                                       ~XFEATURE_MASK_PKRU);
 
        fpregs_mark_activate();
        fpregs_unlock();
@@ -9238,7 +9282,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
        fpregs_lock();
 
-       kvm_save_current_fpu(vcpu->arch.guest_fpu);
+       /*
+        * Guests with protected state can't have it read by the hypervisor,
+        * so skip trying to save it.
+        */
+       if (vcpu->arch.guest_fpu)
+               kvm_save_current_fpu(vcpu->arch.guest_fpu);
 
        copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
 
@@ -9417,6 +9466,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        struct desc_ptr dt;
 
+       if (vcpu->arch.guest_state_protected)
+               goto skip_protected_regs;
+
        kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9434,9 +9486,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        sregs->gdt.limit = dt.size;
        sregs->gdt.base = dt.address;
 
-       sregs->cr0 = kvm_read_cr0(vcpu);
        sregs->cr2 = vcpu->arch.cr2;
        sregs->cr3 = kvm_read_cr3(vcpu);
+
+skip_protected_regs:
+       sregs->cr0 = kvm_read_cr0(vcpu);
        sregs->cr4 = kvm_read_cr4(vcpu);
        sregs->cr8 = kvm_get_cr8(vcpu);
        sregs->efer = vcpu->arch.efer;
@@ -9535,7 +9589,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
-static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
                /*
@@ -9543,31 +9597,29 @@ static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
                 * 64-bit mode (though maybe in a 32-bit code segment).
                 * CR4.PAE and EFER.LMA must be set.
                 */
-               if (!(sregs->cr4 & X86_CR4_PAE)
-                   || !(sregs->efer & EFER_LMA))
-                       return -EINVAL;
+               if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
+                       return false;
        } else {
                /*
                 * Not in 64-bit mode: EFER.LMA is clear and the code
                 * segment cannot be 64-bit.
                 */
                if (sregs->efer & EFER_LMA || sregs->cs.l)
-                       return -EINVAL;
+                       return false;
        }
 
-       return kvm_valid_cr4(vcpu, sregs->cr4);
+       return kvm_is_valid_cr4(vcpu, sregs->cr4);
 }
 
 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        struct msr_data apic_base_msr;
        int mmu_reset_needed = 0;
-       int cpuid_update_needed = 0;
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
        int ret = -EINVAL;
 
-       if (kvm_valid_sregs(vcpu, sregs))
+       if (!kvm_is_valid_sregs(vcpu, sregs))
                goto out;
 
        apic_base_msr.data = sregs->apic_base;
@@ -9575,6 +9627,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
                goto out;
 
+       if (vcpu->arch.guest_state_protected)
+               goto skip_protected_regs;
+
        dt.size = sregs->idt.limit;
        dt.address = sregs->idt.base;
        kvm_x86_ops.set_idt(vcpu, &dt);
@@ -9597,11 +9652,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        vcpu->arch.cr0 = sregs->cr0;
 
        mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
-       cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
-                               (X86_CR4_OSXSAVE | X86_CR4_PKE));
        kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
-       if (cpuid_update_needed)
-               kvm_update_cpuid_runtime(vcpu);
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
        if (is_pae_paging(vcpu)) {
@@ -9613,14 +9664,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        if (mmu_reset_needed)
                kvm_mmu_reset_context(vcpu);
 
-       max_bits = KVM_NR_INTERRUPTS;
-       pending_vec = find_first_bit(
-               (const unsigned long *)sregs->interrupt_bitmap, max_bits);
-       if (pending_vec < max_bits) {
-               kvm_queue_interrupt(vcpu, pending_vec, false);
-               pr_debug("Set back pending irq %d\n", pending_vec);
-       }
-
        kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9639,6 +9682,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
            !is_protmode(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
+skip_protected_regs:
+       max_bits = KVM_NR_INTERRUPTS;
+       pending_vec = find_first_bit(
+               (const unsigned long *)sregs->interrupt_bitmap, max_bits);
+       if (pending_vec < max_bits) {
+               kvm_queue_interrupt(vcpu, pending_vec, false);
+               pr_debug("Set back pending irq %d\n", pending_vec);
+       }
+
        kvm_make_request(KVM_REQ_EVENT, vcpu);
 
        ret = 0;
@@ -9663,6 +9715,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        unsigned long rflags;
        int i, r;
 
+       if (vcpu->arch.guest_state_protected)
+               return -EINVAL;
+
        vcpu_load(vcpu);
 
        if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
@@ -9742,6 +9797,9 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        struct fxregs_state *fxsave;
 
+       if (!vcpu->arch.guest_fpu)
+               return 0;
+
        vcpu_load(vcpu);
 
        fxsave = &vcpu->arch.guest_fpu->state.fxsave;
@@ -9762,6 +9820,9 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        struct fxregs_state *fxsave;
 
+       if (!vcpu->arch.guest_fpu)
+               return 0;
+
        vcpu_load(vcpu);
 
        fxsave = &vcpu->arch.guest_fpu->state.fxsave;
@@ -9820,6 +9881,9 @@ static int sync_regs(struct kvm_vcpu *vcpu)
 
 static void fx_init(struct kvm_vcpu *vcpu)
 {
+       if (!vcpu->arch.guest_fpu)
+               return;
+
        fpstate_init(&vcpu->arch.guest_fpu->state);
        if (boot_cpu_has(X86_FEATURE_XSAVES))
                vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
@@ -9833,6 +9897,15 @@ static void fx_init(struct kvm_vcpu *vcpu)
        vcpu->arch.cr0 |= X86_CR0_ET;
 }
 
+void kvm_free_guest_fpu(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.guest_fpu) {
+               kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+               vcpu->arch.guest_fpu = NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(kvm_free_guest_fpu);
+
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 {
        if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
@@ -9869,7 +9942,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
        r = -ENOMEM;
 
-       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+       page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!page)
                goto fail_free_lapic;
        vcpu->arch.pio_data = page_address(page);
@@ -9928,7 +10001,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        return 0;
 
 free_guest_fpu:
-       kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+       kvm_free_guest_fpu(vcpu);
 free_user_fpu:
        kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
 free_emulate_ctxt:
@@ -9982,7 +10055,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
        free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
        kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
-       kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
+       kvm_free_guest_fpu(vcpu);
 
        kvm_hv_vcpu_uninit(vcpu);
        kvm_pmu_destroy(vcpu);
@@ -10030,7 +10103,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        kvm_async_pf_hash_reset(vcpu);
        vcpu->arch.apf.halted = false;
 
-       if (kvm_mpx_supported()) {
+       if (vcpu->arch.guest_fpu && kvm_mpx_supported()) {
                void *mpx_state_buffer;
 
                /*
@@ -10349,7 +10422,32 @@ void kvm_arch_sync_events(struct kvm *kvm)
        kvm_free_pit(kvm);
 }
 
-int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
+#define  ERR_PTR_USR(e)  ((void __user *)ERR_PTR(e))
+
+/**
+ * __x86_set_memory_region: Setup KVM internal memory slot
+ *
+ * @kvm: the kvm pointer to the VM.
+ * @id: the slot ID to setup.
+ * @gpa: the GPA to install the slot (unused when @size == 0).
+ * @size: the size of the slot. Set to zero to uninstall a slot.
+ *
+ * This function helps to setup a KVM internal memory slot.  Specify
+ * @size > 0 to install a new slot, while @size == 0 to uninstall a
+ * slot.  The return code can be one of the following:
+ *
+ *   HVA:           on success (uninstall will return a bogus HVA)
+ *   -errno:        on error
+ *
+ * The caller should always use IS_ERR() to check the return value
+ * before use.  Note, the KVM internal memory slots are guaranteed to
+ * remain valid and unchanged until the VM is destroyed, i.e., the
+ * GPA->HVA translation will not change.  However, the HVA is a user
+ * address, i.e. its accessibility is not guaranteed, and must be
+ * accessed via __copy_{to,from}_user().
+ */
+void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
+                                     u32 size)
 {
        int i, r;
        unsigned long hva, old_npages;
@@ -10358,12 +10456,12 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 
        /* Called with kvm->slots_lock held.  */
        if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
-               return -EINVAL;
+               return ERR_PTR_USR(-EINVAL);
 
        slot = id_to_memslot(slots, id);
        if (size) {
                if (slot && slot->npages)
-                       return -EEXIST;
+                       return ERR_PTR_USR(-EEXIST);
 
                /*
                 * MAP_SHARED to prevent internal slot pages from being moved
@@ -10372,7 +10470,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
                              MAP_SHARED | MAP_ANONYMOUS, 0);
                if (IS_ERR((void *)hva))
-                       return PTR_ERR((void *)hva);
+                       return (void __user *)hva;
        } else {
                if (!slot || !slot->npages)
                        return 0;
@@ -10391,13 +10489,13 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                m.memory_size = size;
                r = __kvm_set_memory_region(kvm, &m);
                if (r < 0)
-                       return r;
+                       return ERR_PTR_USR(r);
        }
 
        if (!size)
                vm_munmap(hva, old_npages * PAGE_SIZE);
 
-       return 0;
+       return (void __user *)hva;
 }
 EXPORT_SYMBOL_GPL(__x86_set_memory_region);
 
@@ -10754,6 +10852,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
+       /* Can't read the RIP when guest state is protected, just return 0 */
+       if (vcpu->arch.guest_state_protected)
+               return 0;
+
        if (is_64_bit_mode(vcpu))
                return kvm_rip_read(vcpu);
        return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
@@ -11263,6 +11365,179 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 }
 EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
 
+static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_mmio_fragment *frag;
+       unsigned int len;
+
+       BUG_ON(!vcpu->mmio_needed);
+
+       /* Complete previous fragment */
+       frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
+       len = min(8u, frag->len);
+       if (!vcpu->mmio_is_write)
+               memcpy(frag->data, run->mmio.data, len);
+
+       if (frag->len <= 8) {
+               /* Switch to the next fragment. */
+               frag++;
+               vcpu->mmio_cur_fragment++;
+       } else {
+               /* Go forward to the next mmio piece. */
+               frag->data += len;
+               frag->gpa += len;
+               frag->len -= len;
+       }
+
+       if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
+               vcpu->mmio_needed = 0;
+
+               // VMG change, at this point, we're always done
+               // RIP has already been advanced
+               return 1;
+       }
+
+       // More MMIO is needed
+       run->mmio.phys_addr = frag->gpa;
+       run->mmio.len = min(8u, frag->len);
+       run->mmio.is_write = vcpu->mmio_is_write;
+       if (run->mmio.is_write)
+               memcpy(run->mmio.data, frag->data, min(8u, frag->len));
+       run->exit_reason = KVM_EXIT_MMIO;
+
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
+
+       return 0;
+}
+
+int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
+                         void *data)
+{
+       int handled;
+       struct kvm_mmio_fragment *frag;
+
+       if (!data)
+               return -EINVAL;
+
+       handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
+       if (handled == bytes)
+               return 1;
+
+       bytes -= handled;
+       gpa += handled;
+       data += handled;
+
+       /*TODO: Check if need to increment number of frags */
+       frag = vcpu->mmio_fragments;
+       vcpu->mmio_nr_fragments = 1;
+       frag->len = bytes;
+       frag->gpa = gpa;
+       frag->data = data;
+
+       vcpu->mmio_needed = 1;
+       vcpu->mmio_cur_fragment = 0;
+
+       vcpu->run->mmio.phys_addr = gpa;
+       vcpu->run->mmio.len = min(8u, frag->len);
+       vcpu->run->mmio.is_write = 1;
+       memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
+       vcpu->run->exit_reason = KVM_EXIT_MMIO;
+
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
+
+int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
+                        void *data)
+{
+       int handled;
+       struct kvm_mmio_fragment *frag;
+
+       if (!data)
+               return -EINVAL;
+
+       handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
+       if (handled == bytes)
+               return 1;
+
+       bytes -= handled;
+       gpa += handled;
+       data += handled;
+
+       /*TODO: Check if need to increment number of frags */
+       frag = vcpu->mmio_fragments;
+       vcpu->mmio_nr_fragments = 1;
+       frag->len = bytes;
+       frag->gpa = gpa;
+       frag->data = data;
+
+       vcpu->mmio_needed = 1;
+       vcpu->mmio_cur_fragment = 0;
+
+       vcpu->run->mmio.phys_addr = gpa;
+       vcpu->run->mmio.len = min(8u, frag->len);
+       vcpu->run->mmio.is_write = 0;
+       vcpu->run->exit_reason = KVM_EXIT_MMIO;
+
+       vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
+
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+       memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
+              vcpu->arch.pio.count * vcpu->arch.pio.size);
+       vcpu->arch.pio.count = 0;
+
+       return 1;
+}
+
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+                          unsigned int port, void *data,  unsigned int count)
+{
+       int ret;
+
+       ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
+                                       data, count);
+       if (ret)
+               return ret;
+
+       vcpu->arch.pio.count = 0;
+
+       return 0;
+}
+
+static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
+                         unsigned int port, void *data, unsigned int count)
+{
+       int ret;
+
+       ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port,
+                                      data, count);
+       if (ret) {
+               vcpu->arch.pio.count = 0;
+       } else {
+               vcpu->arch.guest_ins_data = data;
+               vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
+       }
+
+       return 0;
+}
+
+int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
+                        unsigned int port, void *data,  unsigned int count,
+                        int in)
+{
+       return in ? kvm_sev_es_ins(vcpu, size, port, data, count)
+                 : kvm_sev_es_outs(vcpu, size, port, data, count);
+}
+EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
@@ -11285,3 +11560,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
index e7ca622..c5ee0f5 100644 (file)
@@ -3,6 +3,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include <asm/mce.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
@@ -278,6 +279,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 supported_xcr0;
+extern u64 host_xss;
 extern u64 supported_xss;
 
 static inline bool kvm_mpx_supported(void)
@@ -366,10 +368,29 @@ static inline bool kvm_dr6_valid(u64 data)
        return !(data >> 32);
 }
 
+/*
+ * Trigger machine check on the host. We assume all the MSRs are already set up
+ * by the CPU and that we still run on the same CPU as the MCE occurred on.
+ * We pass a fake environment to the machine check handler because we want
+ * the guest to be always treated like user space, no matter what context
+ * it used internally.
+ */
+static inline void kvm_machine_check(void)
+{
+#if defined(CONFIG_X86_MCE)
+       struct pt_regs regs = {
+               .cs = 3, /* Fake ring 3 no matter what the guest ran on */
+               .flags = X86_EFLAGS_IF,
+       };
+
+       do_machine_check(&regs);
+#endif
+}
+
 void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
 void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
 int kvm_spec_ctrl_test_value(u64 value);
-int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu);
 int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
                              struct x86_exception *e);
@@ -407,4 +428,12 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
        __reserved_bits;                                \
 })
 
+int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
+                         void *dst);
+int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
+                        void *dst);
+int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
+                        unsigned int port, void *data,  unsigned int count,
+                        int in);
+
 #endif
index 218acbd..afc1da6 100644 (file)
@@ -26,6 +26,19 @@ config XEN_PV
        help
          Support running as a Xen PV guest.
 
+config XEN_512GB
+       bool "Limit Xen pv-domain memory to 512GB"
+       depends on XEN_PV
+       default y
+       help
+         Limit paravirtualized user domains to 512GB of RAM.
+
+         The Xen tools and crash dump analysis tools might not support
+         pv-domains with more than 512 GB of RAM. This option controls the
+         default setting of the kernel to use only up to 512 GB or more.
+         It is always possible to change the default via specifying the
+         boot parameter "xen_512gb_limit".
+
 config XEN_PV_SMP
        def_bool y
        depends on XEN_PV && SMP
@@ -39,28 +52,19 @@ config XEN_DOM0
          Support running as a Xen PV Dom0 guest.
 
 config XEN_PVHVM
-       bool "Xen PVHVM guest support"
-       default y
-       depends on XEN && PCI && X86_LOCAL_APIC
-       help
-         Support running as a Xen PVHVM guest.
+       def_bool y
+       depends on XEN && X86_LOCAL_APIC
 
 config XEN_PVHVM_SMP
        def_bool y
        depends on XEN_PVHVM && SMP
 
-config XEN_512GB
-       bool "Limit Xen pv-domain memory to 512GB"
-       depends on XEN_PV
+config XEN_PVHVM_GUEST
+       bool "Xen PVHVM guest support"
        default y
+       depends on XEN_PVHVM && PCI
        help
-         Limit paravirtualized user domains to 512GB of RAM.
-
-         The Xen tools and crash dump analysis tools might not support
-         pv-domains with more than 512 GB of RAM. This option controls the
-         default setting of the kernel to use only up to 512 GB or more.
-         It is always possible to change the default via specifying the
-         boot parameter "xen_512gb_limit".
+         Support running as a Xen PVHVM guest.
 
 config XEN_SAVE_RESTORE
        bool
@@ -76,7 +80,9 @@ config XEN_DEBUG_FS
          Enabling this option may incur a significant performance overhead.
 
 config XEN_PVH
-       bool "Support for running as a Xen PVH guest"
+       bool "Xen PVH guest support"
        depends on XEN && XEN_PVHVM && ACPI
        select PVH
        def_bool n
+       help
+         Support for running as a Xen PVH guest.
index be4151f..3301875 100644 (file)
@@ -795,17 +795,7 @@ static int p2m_dump_show(struct seq_file *m, void *v)
        return 0;
 }
 
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
-       return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
-       .open           = p2m_dump_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(p2m_dump);
 
 static struct dentry *d_mmu_debug;
 
index b070f27..46116a2 100644 (file)
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
+441    common  epoll_pwait2                    sys_epoll_pwait2
index 77971fe..e8ceb15 100644 (file)
@@ -13,7 +13,7 @@
  */
 
 #include <linux/clk.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/time.h>
index 44e4125..076894a 100644 (file)
@@ -54,6 +54,7 @@ acpi-y                                += property.o
 acpi-$(CONFIG_X86)             += acpi_cmos_rtc.o
 acpi-$(CONFIG_X86)             += x86/apple.o
 acpi-$(CONFIG_X86)             += x86/utils.o
+acpi-$(CONFIG_X86)             += x86/s2idle.o
 acpi-$(CONFIG_DEBUG_FS)                += debugfs.o
 acpi-y                         += acpi_lpat.o
 acpi-$(CONFIG_ACPI_LPIT)       += acpi_lpit.o
index 4ed755a..8f2dc17 100644 (file)
@@ -319,6 +319,9 @@ static bool matching_id(const char *idstr, const char *list_id)
 {
        int i;
 
+       if (strlen(idstr) != strlen(list_id))
+               return false;
+
        if (memcmp(idstr, list_id, 3))
                return false;
 
index a852dc4..75aaf94 100644 (file)
@@ -414,109 +414,88 @@ end:
        return result;
 }
 
+bool acpi_cpc_valid(void)
+{
+       struct cpc_desc *cpc_ptr;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+               if (!cpc_ptr)
+                       return false;
+       }
+
+       return true;
+}
+EXPORT_SYMBOL_GPL(acpi_cpc_valid);
+
 /**
- * acpi_get_psd_map - Map the CPUs in a common freq domain.
- * @all_cpu_data: Ptrs to CPU specific CPPC data including PSD info.
+ * acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu
+ * @cpu: Find all CPUs that share a domain with cpu.
+ * @cpu_data: Pointer to CPU specific CPPC data including PSD info.
  *
  *     Return: 0 for success or negative value for err.
  */
-int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
+int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data)
 {
-       int count_target;
-       int retval = 0;
-       unsigned int i, j;
-       cpumask_var_t covered_cpus;
-       struct cppc_cpudata *pr, *match_pr;
-       struct acpi_psd_package *pdomain;
-       struct acpi_psd_package *match_pdomain;
        struct cpc_desc *cpc_ptr, *match_cpc_ptr;
-
-       if (!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))
-               return -ENOMEM;
+       struct acpi_psd_package *match_pdomain;
+       struct acpi_psd_package *pdomain;
+       int count_target, i;
 
        /*
         * Now that we have _PSD data from all CPUs, let's setup P-state
         * domain info.
         */
-       for_each_possible_cpu(i) {
-               if (cpumask_test_cpu(i, covered_cpus))
-                       continue;
-
-               pr = all_cpu_data[i];
-               cpc_ptr = per_cpu(cpc_desc_ptr, i);
-               if (!cpc_ptr) {
-                       retval = -EFAULT;
-                       goto err_ret;
-               }
+       cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+       if (!cpc_ptr)
+               return -EFAULT;
 
-               pdomain = &(cpc_ptr->domain_info);
-               cpumask_set_cpu(i, pr->shared_cpu_map);
-               cpumask_set_cpu(i, covered_cpus);
-               if (pdomain->num_processors <= 1)
-                       continue;
+       pdomain = &(cpc_ptr->domain_info);
+       cpumask_set_cpu(cpu, cpu_data->shared_cpu_map);
+       if (pdomain->num_processors <= 1)
+               return 0;
 
-               /* Validate the Domain info */
-               count_target = pdomain->num_processors;
-               if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
-                       pr->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-               else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
-                       pr->shared_type = CPUFREQ_SHARED_TYPE_HW;
-               else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
-                       pr->shared_type = CPUFREQ_SHARED_TYPE_ANY;
-
-               for_each_possible_cpu(j) {
-                       if (i == j)
-                               continue;
-
-                       match_cpc_ptr = per_cpu(cpc_desc_ptr, j);
-                       if (!match_cpc_ptr) {
-                               retval = -EFAULT;
-                               goto err_ret;
-                       }
+       /* Validate the Domain info */
+       count_target = pdomain->num_processors;
+       if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL)
+               cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+       else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL)
+               cpu_data->shared_type = CPUFREQ_SHARED_TYPE_HW;
+       else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY)
+               cpu_data->shared_type = CPUFREQ_SHARED_TYPE_ANY;
 
-                       match_pdomain = &(match_cpc_ptr->domain_info);
-                       if (match_pdomain->domain != pdomain->domain)
-                               continue;
+       for_each_possible_cpu(i) {
+               if (i == cpu)
+                       continue;
 
-                       /* Here i and j are in the same domain */
-                       if (match_pdomain->num_processors != count_target) {
-                               retval = -EFAULT;
-                               goto err_ret;
-                       }
+               match_cpc_ptr = per_cpu(cpc_desc_ptr, i);
+               if (!match_cpc_ptr)
+                       goto err_fault;
 
-                       if (pdomain->coord_type != match_pdomain->coord_type) {
-                               retval = -EFAULT;
-                               goto err_ret;
-                       }
+               match_pdomain = &(match_cpc_ptr->domain_info);
+               if (match_pdomain->domain != pdomain->domain)
+                       continue;
 
-                       cpumask_set_cpu(j, covered_cpus);
-                       cpumask_set_cpu(j, pr->shared_cpu_map);
-               }
+               /* Here i and cpu are in the same domain */
+               if (match_pdomain->num_processors != count_target)
+                       goto err_fault;
 
-               for_each_cpu(j, pr->shared_cpu_map) {
-                       if (i == j)
-                               continue;
+               if (pdomain->coord_type != match_pdomain->coord_type)
+                       goto err_fault;
 
-                       match_pr = all_cpu_data[j];
-                       match_pr->shared_type = pr->shared_type;
-                       cpumask_copy(match_pr->shared_cpu_map,
-                                    pr->shared_cpu_map);
-               }
+               cpumask_set_cpu(i, cpu_data->shared_cpu_map);
        }
-       goto out;
 
-err_ret:
-       for_each_possible_cpu(i) {
-               pr = all_cpu_data[i];
+       return 0;
 
-               /* Assume no coordination on any error parsing domain info */
-               cpumask_clear(pr->shared_cpu_map);
-               cpumask_set_cpu(i, pr->shared_cpu_map);
-               pr->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-       }
-out:
-       free_cpumask_var(covered_cpus);
-       return retval;
+err_fault:
+       /* Assume no coordination on any error parsing domain info */
+       cpumask_clear(cpu_data->shared_cpu_map);
+       cpumask_set_cpu(cpu, cpu_data->shared_cpu_map);
+       cpu_data->shared_type = CPUFREQ_SHARED_TYPE_NONE;
+
+       return -EFAULT;
 }
 EXPORT_SYMBOL_GPL(acpi_get_psd_map);
 
index 0dcedd6..32f0f55 100644 (file)
@@ -708,7 +708,7 @@ err_ret:
                if (retval) {
                        cpumask_clear(pr->performance->shared_cpu_map);
                        cpumask_set_cpu(i, pr->performance->shared_cpu_map);
-                       pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+                       pr->performance->shared_type = CPUFREQ_SHARED_TYPE_NONE;
                }
                pr->performance = NULL; /* Will be set for real in register */
        }
index a1b226e..80b668c 100644 (file)
@@ -752,6 +752,7 @@ static bool acpi_info_matches_ids(struct acpi_device_info *info,
 /* List of HIDs for which we ignore matching ACPI devices, when checking _DEP lists. */
 static const char * const acpi_ignore_dep_ids[] = {
        "PNP0D80", /* Windows-compatible System Power Management Controller */
+       "INT33BD", /* Intel Baytrail Mailbox Device */
        NULL
 };
 
@@ -1635,8 +1636,6 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
        device_initialize(&device->dev);
        dev_set_uevent_suppress(&device->dev, true);
        acpi_init_coherency(device);
-       /* Assume there are unmet deps until acpi_device_dep_initialize() runs */
-       device->dep_unmet = 1;
 }
 
 void acpi_device_add_finalize(struct acpi_device *device)
@@ -1842,32 +1841,36 @@ static void acpi_scan_init_hotplug(struct acpi_device *adev)
        }
 }
 
-static void acpi_device_dep_initialize(struct acpi_device *adev)
+static u32 acpi_scan_check_dep(acpi_handle handle)
 {
-       struct acpi_dep_data *dep;
        struct acpi_handle_list dep_devices;
        acpi_status status;
+       u32 count;
        int i;
 
-       adev->dep_unmet = 0;
-
-       if (!acpi_has_method(adev->handle, "_DEP"))
-               return;
+       /*
+        * Check for _HID here to avoid deferring the enumeration of:
+        * 1. PCI devices.
+        * 2. ACPI nodes describing USB ports.
+        * Still, checking for _HID catches more then just these cases ...
+        */
+       if (!acpi_has_method(handle, "_DEP") || !acpi_has_method(handle, "_HID"))
+               return 0;
 
-       status = acpi_evaluate_reference(adev->handle, "_DEP", NULL,
-                                       &dep_devices);
+       status = acpi_evaluate_reference(handle, "_DEP", NULL, &dep_devices);
        if (ACPI_FAILURE(status)) {
-               dev_dbg(&adev->dev, "Failed to evaluate _DEP.\n");
-               return;
+               acpi_handle_debug(handle, "Failed to evaluate _DEP.\n");
+               return 0;
        }
 
-       for (i = 0; i < dep_devices.count; i++) {
+       for (count = 0, i = 0; i < dep_devices.count; i++) {
                struct acpi_device_info *info;
-               int skip;
+               struct acpi_dep_data *dep;
+               bool skip;
 
                status = acpi_get_object_info(dep_devices.handles[i], &info);
                if (ACPI_FAILURE(status)) {
-                       dev_dbg(&adev->dev, "Error reading _DEP device info\n");
+                       acpi_handle_debug(handle, "Error reading _DEP device info\n");
                        continue;
                }
 
@@ -1877,26 +1880,45 @@ static void acpi_device_dep_initialize(struct acpi_device *adev)
                if (skip)
                        continue;
 
-               dep = kzalloc(sizeof(struct acpi_dep_data), GFP_KERNEL);
+               dep = kzalloc(sizeof(*dep), GFP_KERNEL);
                if (!dep)
-                       return;
+                       continue;
+
+               count++;
 
                dep->supplier = dep_devices.handles[i];
-               dep->consumer  = adev->handle;
-               adev->dep_unmet++;
+               dep->consumer = handle;
 
                mutex_lock(&acpi_dep_list_lock);
                list_add_tail(&dep->node , &acpi_dep_list);
                mutex_unlock(&acpi_dep_list_lock);
        }
+
+       return count;
 }
 
-static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
-                                     void *not_used, void **return_value)
+static void acpi_scan_dep_init(struct acpi_device *adev)
+{
+       struct acpi_dep_data *dep;
+
+       mutex_lock(&acpi_dep_list_lock);
+
+       list_for_each_entry(dep, &acpi_dep_list, node) {
+               if (dep->consumer == adev->handle)
+                       adev->dep_unmet++;
+       }
+
+       mutex_unlock(&acpi_dep_list_lock);
+}
+
+static bool acpi_bus_scan_second_pass;
+
+static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep,
+                                     struct acpi_device **adev_p)
 {
        struct acpi_device *device = NULL;
-       int type;
        unsigned long long sta;
+       int type;
        int result;
 
        acpi_bus_get_device(handle, &device);
@@ -1912,20 +1934,42 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
                return AE_OK;
        }
 
+       if (type == ACPI_BUS_TYPE_DEVICE && check_dep) {
+               u32 count = acpi_scan_check_dep(handle);
+               /* Bail out if the number of recorded dependencies is not 0. */
+               if (count > 0) {
+                       acpi_bus_scan_second_pass = true;
+                       return AE_CTRL_DEPTH;
+               }
+       }
+
        acpi_add_single_object(&device, handle, type, sta);
        if (!device)
                return AE_CTRL_DEPTH;
 
        acpi_scan_init_hotplug(device);
-       acpi_device_dep_initialize(device);
+       if (!check_dep)
+               acpi_scan_dep_init(device);
 
- out:
-       if (!*return_value)
-               *return_value = device;
+out:
+       if (!*adev_p)
+               *adev_p = device;
 
        return AE_OK;
 }
 
+static acpi_status acpi_bus_check_add_1(acpi_handle handle, u32 lvl_not_used,
+                                       void *not_used, void **ret_p)
+{
+       return acpi_bus_check_add(handle, true, (struct acpi_device **)ret_p);
+}
+
+static acpi_status acpi_bus_check_add_2(acpi_handle handle, u32 lvl_not_used,
+                                       void *not_used, void **ret_p)
+{
+       return acpi_bus_check_add(handle, false, (struct acpi_device **)ret_p);
+}
+
 static void acpi_default_enumeration(struct acpi_device *device)
 {
        /*
@@ -1993,12 +2037,16 @@ static int acpi_scan_attach_handler(struct acpi_device *device)
        return ret;
 }
 
-static void acpi_bus_attach(struct acpi_device *device)
+static void acpi_bus_attach(struct acpi_device *device, bool first_pass)
 {
        struct acpi_device *child;
+       bool skip = !first_pass && device->flags.visited;
        acpi_handle ejd;
        int ret;
 
+       if (skip)
+               goto ok;
+
        if (ACPI_SUCCESS(acpi_bus_get_ejd(device->handle, &ejd)))
                register_dock_dependent_device(device, ejd);
 
@@ -2045,9 +2093,9 @@ static void acpi_bus_attach(struct acpi_device *device)
 
  ok:
        list_for_each_entry(child, &device->children, node)
-               acpi_bus_attach(child);
+               acpi_bus_attach(child, first_pass);
 
-       if (device->handler && device->handler->hotplug.notify_online)
+       if (!skip && device->handler && device->handler->hotplug.notify_online)
                device->handler->hotplug.notify_online(device);
 }
 
@@ -2065,7 +2113,8 @@ void acpi_walk_dep_device_list(acpi_handle handle)
 
                        adev->dep_unmet--;
                        if (!adev->dep_unmet)
-                               acpi_bus_attach(adev);
+                               acpi_bus_attach(adev, true);
+
                        list_del(&dep->node);
                        kfree(dep);
                }
@@ -2090,17 +2139,37 @@ EXPORT_SYMBOL_GPL(acpi_walk_dep_device_list);
  */
 int acpi_bus_scan(acpi_handle handle)
 {
-       void *device = NULL;
+       struct acpi_device *device = NULL;
+
+       acpi_bus_scan_second_pass = false;
 
-       if (ACPI_SUCCESS(acpi_bus_check_add(handle, 0, NULL, &device)))
+       /* Pass 1: Avoid enumerating devices with missing dependencies. */
+
+       if (ACPI_SUCCESS(acpi_bus_check_add(handle, true, &device)))
                acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
-                                   acpi_bus_check_add, NULL, NULL, &device);
+                                   acpi_bus_check_add_1, NULL, NULL,
+                                   (void **)&device);
+
+       if (!device)
+               return -ENODEV;
 
-       if (device) {
-               acpi_bus_attach(device);
+       acpi_bus_attach(device, true);
+
+       if (!acpi_bus_scan_second_pass)
                return 0;
-       }
-       return -ENODEV;
+
+       /* Pass 2: Enumerate all of the remaining devices. */
+
+       device = NULL;
+
+       if (ACPI_SUCCESS(acpi_bus_check_add(handle, false, &device)))
+               acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX,
+                                   acpi_bus_check_add_2, NULL, NULL,
+                                   (void **)&device);
+
+       acpi_bus_attach(device, false);
+
+       return 0;
 }
 EXPORT_SYMBOL(acpi_bus_scan);
 
index aff13bf..09fd137 100644 (file)
@@ -92,10 +92,6 @@ bool acpi_sleep_state_supported(u8 sleep_state)
 }
 
 #ifdef CONFIG_ACPI_SLEEP
-static bool sleep_no_lps0 __read_mostly;
-module_param(sleep_no_lps0, bool, 0644);
-MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface");
-
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
 
 u32 acpi_target_system_state(void)
@@ -165,7 +161,7 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d)
        return 0;
 }
 
-static bool acpi_sleep_default_s3;
+bool acpi_sleep_default_s3;
 
 static int __init init_default_s3(const struct dmi_system_id *d)
 {
@@ -688,268 +684,13 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = {
 
 static bool s2idle_wakeup;
 
-/*
- * On platforms supporting the Low Power S0 Idle interface there is an ACPI
- * device object with the PNP0D80 compatible device ID (System Power Management
- * Controller) and a specific _DSM method under it.  That method, if present,
- * can be used to indicate to the platform that the OS is transitioning into a
- * low-power state in which certain types of activity are not desirable or that
- * it is leaving such a state, which allows the platform to adjust its operation
- * mode accordingly.
- */
-static const struct acpi_device_id lps0_device_ids[] = {
-       {"PNP0D80", },
-       {"", },
-};
-
-#define ACPI_LPS0_DSM_UUID     "c4eb40a0-6cd2-11e2-bcfd-0800200c9a66"
-
-#define ACPI_LPS0_GET_DEVICE_CONSTRAINTS       1
-#define ACPI_LPS0_SCREEN_OFF   3
-#define ACPI_LPS0_SCREEN_ON    4
-#define ACPI_LPS0_ENTRY                5
-#define ACPI_LPS0_EXIT         6
-
-static acpi_handle lps0_device_handle;
-static guid_t lps0_dsm_guid;
-static char lps0_dsm_func_mask;
-
-/* Device constraint entry structure */
-struct lpi_device_info {
-       char *name;
-       int enabled;
-       union acpi_object *package;
-};
-
-/* Constraint package structure */
-struct lpi_device_constraint {
-       int uid;
-       int min_dstate;
-       int function_states;
-};
-
-struct lpi_constraints {
-       acpi_handle handle;
-       int min_dstate;
-};
-
-static struct lpi_constraints *lpi_constraints_table;
-static int lpi_constraints_table_size;
-
-static void lpi_device_get_constraints(void)
-{
-       union acpi_object *out_obj;
-       int i;
-
-       out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid,
-                                         1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS,
-                                         NULL, ACPI_TYPE_PACKAGE);
-
-       acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n",
-                         out_obj ? "successful" : "failed");
-
-       if (!out_obj)
-               return;
-
-       lpi_constraints_table = kcalloc(out_obj->package.count,
-                                       sizeof(*lpi_constraints_table),
-                                       GFP_KERNEL);
-       if (!lpi_constraints_table)
-               goto free_acpi_buffer;
-
-       acpi_handle_debug(lps0_device_handle, "LPI: constraints list begin:\n");
-
-       for (i = 0; i < out_obj->package.count; i++) {
-               struct lpi_constraints *constraint;
-               acpi_status status;
-               union acpi_object *package = &out_obj->package.elements[i];
-               struct lpi_device_info info = { };
-               int package_count = 0, j;
-
-               if (!package)
-                       continue;
-
-               for (j = 0; j < package->package.count; ++j) {
-                       union acpi_object *element =
-                                       &(package->package.elements[j]);
-
-                       switch (element->type) {
-                       case ACPI_TYPE_INTEGER:
-                               info.enabled = element->integer.value;
-                               break;
-                       case ACPI_TYPE_STRING:
-                               info.name = element->string.pointer;
-                               break;
-                       case ACPI_TYPE_PACKAGE:
-                               package_count = element->package.count;
-                               info.package = element->package.elements;
-                               break;
-                       }
-               }
-
-               if (!info.enabled || !info.package || !info.name)
-                       continue;
-
-               constraint = &lpi_constraints_table[lpi_constraints_table_size];
-
-               status = acpi_get_handle(NULL, info.name, &constraint->handle);
-               if (ACPI_FAILURE(status))
-                       continue;
-
-               acpi_handle_debug(lps0_device_handle,
-                                 "index:%d Name:%s\n", i, info.name);
-
-               constraint->min_dstate = -1;
-
-               for (j = 0; j < package_count; ++j) {
-                       union acpi_object *info_obj = &info.package[j];
-                       union acpi_object *cnstr_pkg;
-                       union acpi_object *obj;
-                       struct lpi_device_constraint dev_info;
-
-                       switch (info_obj->type) {
-                       case ACPI_TYPE_INTEGER:
-                               /* version */
-                               break;
-                       case ACPI_TYPE_PACKAGE:
-                               if (info_obj->package.count < 2)
-                                       break;
-
-                               cnstr_pkg = info_obj->package.elements;
-                               obj = &cnstr_pkg[0];
-                               dev_info.uid = obj->integer.value;
-                               obj = &cnstr_pkg[1];
-                               dev_info.min_dstate = obj->integer.value;
-
-                               acpi_handle_debug(lps0_device_handle,
-                                       "uid:%d min_dstate:%s\n",
-                                       dev_info.uid,
-                                       acpi_power_state_string(dev_info.min_dstate));
-
-                               constraint->min_dstate = dev_info.min_dstate;
-                               break;
-                       }
-               }
-
-               if (constraint->min_dstate < 0) {
-                       acpi_handle_debug(lps0_device_handle,
-                                         "Incomplete constraint defined\n");
-                       continue;
-               }
-
-               lpi_constraints_table_size++;
-       }
-
-       acpi_handle_debug(lps0_device_handle, "LPI: constraints list end\n");
-
-free_acpi_buffer:
-       ACPI_FREE(out_obj);
-}
-
-static void lpi_check_constraints(void)
-{
-       int i;
-
-       for (i = 0; i < lpi_constraints_table_size; ++i) {
-               acpi_handle handle = lpi_constraints_table[i].handle;
-               struct acpi_device *adev;
-
-               if (!handle || acpi_bus_get_device(handle, &adev))
-                       continue;
-
-               acpi_handle_debug(handle,
-                       "LPI: required min power state:%s current power state:%s\n",
-                       acpi_power_state_string(lpi_constraints_table[i].min_dstate),
-                       acpi_power_state_string(adev->power.state));
-
-               if (!adev->flags.power_manageable) {
-                       acpi_handle_info(handle, "LPI: Device not power manageable\n");
-                       lpi_constraints_table[i].handle = NULL;
-                       continue;
-               }
-
-               if (adev->power.state < lpi_constraints_table[i].min_dstate)
-                       acpi_handle_info(handle,
-                               "LPI: Constraint not met; min power state:%s current power state:%s\n",
-                               acpi_power_state_string(lpi_constraints_table[i].min_dstate),
-                               acpi_power_state_string(adev->power.state));
-       }
-}
-
-static void acpi_sleep_run_lps0_dsm(unsigned int func)
-{
-       union acpi_object *out_obj;
-
-       if (!(lps0_dsm_func_mask & (1 << func)))
-               return;
-
-       out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, 1, func, NULL);
-       ACPI_FREE(out_obj);
-
-       acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
-                         func, out_obj ? "successful" : "failed");
-}
-
-static int lps0_device_attach(struct acpi_device *adev,
-                             const struct acpi_device_id *not_used)
-{
-       union acpi_object *out_obj;
-
-       if (lps0_device_handle)
-               return 0;
-
-       if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
-               return 0;
-
-       guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid);
-       /* Check if the _DSM is present and as expected. */
-       out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL);
-       if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) {
-               acpi_handle_debug(adev->handle,
-                                 "_DSM function 0 evaluation failed\n");
-               return 0;
-       }
-
-       lps0_dsm_func_mask = *(char *)out_obj->buffer.pointer;
-
-       ACPI_FREE(out_obj);
-
-       acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
-                         lps0_dsm_func_mask);
-
-       lps0_device_handle = adev->handle;
-
-       lpi_device_get_constraints();
-
-       /*
-        * Use suspend-to-idle by default if the default suspend mode was not
-        * set from the command line.
-        */
-       if (mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3)
-               mem_sleep_current = PM_SUSPEND_TO_IDLE;
-
-       /*
-        * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the
-        * EC GPE to be enabled while suspended for certain wakeup devices to
-        * work, so mark it as wakeup-capable.
-        */
-       acpi_ec_mark_gpe_for_wake();
-
-       return 0;
-}
-
-static struct acpi_scan_handler lps0_handler = {
-       .ids = lps0_device_ids,
-       .attach = lps0_device_attach,
-};
-
-static int acpi_s2idle_begin(void)
+int acpi_s2idle_begin(void)
 {
        acpi_scan_lock_acquire();
        return 0;
 }
 
-static int acpi_s2idle_prepare(void)
+int acpi_s2idle_prepare(void)
 {
        if (acpi_sci_irq_valid()) {
                enable_irq_wake(acpi_sci_irq);
@@ -966,21 +707,7 @@ static int acpi_s2idle_prepare(void)
        return 0;
 }
 
-static int acpi_s2idle_prepare_late(void)
-{
-       if (!lps0_device_handle || sleep_no_lps0)
-               return 0;
-
-       if (pm_debug_messages_on)
-               lpi_check_constraints();
-
-       acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
-       acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
-
-       return 0;
-}
-
-static bool acpi_s2idle_wake(void)
+bool acpi_s2idle_wake(void)
 {
        if (!acpi_sci_irq_valid())
                return pm_wakeup_pending();
@@ -1046,16 +773,7 @@ static bool acpi_s2idle_wake(void)
        return false;
 }
 
-static void acpi_s2idle_restore_early(void)
-{
-       if (!lps0_device_handle || sleep_no_lps0)
-               return;
-
-       acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
-       acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
-}
-
-static void acpi_s2idle_restore(void)
+void acpi_s2idle_restore(void)
 {
        /*
         * Drain pending events before restoring the working-state configuration
@@ -1077,7 +795,7 @@ static void acpi_s2idle_restore(void)
        }
 }
 
-static void acpi_s2idle_end(void)
+void acpi_s2idle_end(void)
 {
        acpi_scan_lock_release();
 }
@@ -1085,13 +803,16 @@ static void acpi_s2idle_end(void)
 static const struct platform_s2idle_ops acpi_s2idle_ops = {
        .begin = acpi_s2idle_begin,
        .prepare = acpi_s2idle_prepare,
-       .prepare_late = acpi_s2idle_prepare_late,
        .wake = acpi_s2idle_wake,
-       .restore_early = acpi_s2idle_restore_early,
        .restore = acpi_s2idle_restore,
        .end = acpi_s2idle_end,
 };
 
+void __weak acpi_s2idle_setup(void)
+{
+       s2idle_set_ops(&acpi_s2idle_ops);
+}
+
 static void acpi_sleep_suspend_setup(void)
 {
        int i;
@@ -1103,13 +824,11 @@ static void acpi_sleep_suspend_setup(void)
        suspend_set_ops(old_suspend_ordering ?
                &acpi_suspend_ops_old : &acpi_suspend_ops);
 
-       acpi_scan_add_handler(&lps0_handler);
-       s2idle_set_ops(&acpi_s2idle_ops);
+       acpi_s2idle_setup();
 }
 
 #else /* !CONFIG_SUSPEND */
 #define s2idle_wakeup          (false)
-#define lps0_device_handle     (NULL)
 static inline void acpi_sleep_suspend_setup(void) {}
 #endif /* !CONFIG_SUSPEND */
 
index 3d90480..1856f76 100644 (file)
@@ -15,3 +15,19 @@ static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
        return acpi_set_firmware_waking_vector(
                                (acpi_physical_address)wakeup_address, 0);
 }
+
+extern int acpi_s2idle_begin(void);
+extern int acpi_s2idle_prepare(void);
+extern int acpi_s2idle_prepare_late(void);
+extern bool acpi_s2idle_wake(void);
+extern void acpi_s2idle_restore_early(void);
+extern void acpi_s2idle_restore(void);
+extern void acpi_s2idle_end(void);
+
+extern void acpi_s2idle_setup(void);
+
+#ifdef CONFIG_ACPI_SLEEP
+extern bool acpi_sleep_default_s3;
+#else
+#define acpi_sleep_default_s3  (1)
+#endif
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
new file mode 100644 (file)
index 0000000..25fea34
--- /dev/null
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Architecture-specific ACPI-based support for suspend-to-idle.
+ *
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ * Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+ * Author: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+ *
+ * On platforms supporting the Low Power S0 Idle interface there is an ACPI
+ * device object with the PNP0D80 compatible device ID (System Power Management
+ * Controller) and a specific _DSM method under it.  That method, if present,
+ * can be used to indicate to the platform that the OS is transitioning into a
+ * low-power state in which certain types of activity are not desirable or that
+ * it is leaving such a state, which allows the platform to adjust its operation
+ * mode accordingly.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/suspend.h>
+
+#include "../sleep.h"
+
+#ifdef CONFIG_SUSPEND
+
+static bool sleep_no_lps0 __read_mostly;
+module_param(sleep_no_lps0, bool, 0644);
+MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface");
+
+static const struct acpi_device_id lps0_device_ids[] = {
+       {"PNP0D80", },
+       {"", },
+};
+
+#define ACPI_LPS0_DSM_UUID     "c4eb40a0-6cd2-11e2-bcfd-0800200c9a66"
+
+#define ACPI_LPS0_GET_DEVICE_CONSTRAINTS       1
+#define ACPI_LPS0_SCREEN_OFF   3
+#define ACPI_LPS0_SCREEN_ON    4
+#define ACPI_LPS0_ENTRY                5
+#define ACPI_LPS0_EXIT         6
+
+/* AMD */
+#define ACPI_LPS0_DSM_UUID_AMD      "e3f32452-febc-43ce-9039-932122d37721"
+#define ACPI_LPS0_SCREEN_OFF_AMD    4
+#define ACPI_LPS0_SCREEN_ON_AMD     5
+
+static acpi_handle lps0_device_handle;
+static guid_t lps0_dsm_guid;
+static char lps0_dsm_func_mask;
+
+/* Device constraint entry structure */
+struct lpi_device_info {
+       char *name;
+       int enabled;
+       union acpi_object *package;
+};
+
+/* Constraint package structure */
+struct lpi_device_constraint {
+       int uid;
+       int min_dstate;
+       int function_states;
+};
+
+struct lpi_constraints {
+       acpi_handle handle;
+       int min_dstate;
+};
+
+/* AMD */
+/* Device constraint entry structure */
+struct lpi_device_info_amd {
+       int revision;
+       int count;
+       union acpi_object *package;
+};
+
+/* Constraint package structure */
+struct lpi_device_constraint_amd {
+       char *name;
+       int enabled;
+       int function_states;
+       int min_dstate;
+};
+
+static struct lpi_constraints *lpi_constraints_table;
+static int lpi_constraints_table_size;
+static int rev_id;
+
+static void lpi_device_get_constraints_amd(void)
+{
+       union acpi_object *out_obj;
+       int i, j, k;
+
+       out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid,
+                                         1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS,
+                                         NULL, ACPI_TYPE_PACKAGE);
+
+       if (!out_obj)
+               return;
+
+       acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n",
+                         out_obj ? "successful" : "failed");
+
+       for (i = 0; i < out_obj->package.count; i++) {
+               union acpi_object *package = &out_obj->package.elements[i];
+               struct lpi_device_info_amd info = { };
+
+               if (package->type == ACPI_TYPE_INTEGER) {
+                       switch (i) {
+                       case 0:
+                               info.revision = package->integer.value;
+                               break;
+                       case 1:
+                               info.count = package->integer.value;
+                               break;
+                       }
+               } else if (package->type == ACPI_TYPE_PACKAGE) {
+                       lpi_constraints_table = kcalloc(package->package.count,
+                                                       sizeof(*lpi_constraints_table),
+                                                       GFP_KERNEL);
+
+                       if (!lpi_constraints_table)
+                               goto free_acpi_buffer;
+
+                       acpi_handle_debug(lps0_device_handle,
+                                         "LPI: constraints list begin:\n");
+
+                       for (j = 0; j < package->package.count; ++j) {
+                               union acpi_object *info_obj = &package->package.elements[j];
+                               struct lpi_device_constraint_amd dev_info = {};
+                               struct lpi_constraints *list;
+                               acpi_status status;
+
+                               for (k = 0; k < info_obj->package.count; ++k) {
+                                       union acpi_object *obj = &info_obj->package.elements[k];
+                                       union acpi_object *obj_new;
+
+                                       list = &lpi_constraints_table[lpi_constraints_table_size];
+                                       list->min_dstate = -1;
+
+                                       obj_new = &obj[k];
+                                       switch (k) {
+                                       case 0:
+                                               dev_info.enabled = obj->integer.value;
+                                               break;
+                                       case 1:
+                                               dev_info.name = obj->string.pointer;
+                                               break;
+                                       case 2:
+                                               dev_info.function_states = obj->integer.value;
+                                               break;
+                                       case 3:
+                                               dev_info.min_dstate = obj->integer.value;
+                                               break;
+                                       }
+
+                                       if (!dev_info.enabled || !dev_info.name ||
+                                           !dev_info.min_dstate)
+                                               continue;
+
+                                       status = acpi_get_handle(NULL, dev_info.name,
+                                                                &list->handle);
+                                       if (ACPI_FAILURE(status))
+                                               continue;
+
+                                       acpi_handle_debug(lps0_device_handle,
+                                                         "Name:%s\n", dev_info.name);
+
+                                       list->min_dstate = dev_info.min_dstate;
+
+                                       if (list->min_dstate < 0) {
+                                               acpi_handle_debug(lps0_device_handle,
+                                                                 "Incomplete constraint defined\n");
+                                               continue;
+                                       }
+                               }
+                               lpi_constraints_table_size++;
+                       }
+               }
+       }
+
+       acpi_handle_debug(lps0_device_handle, "LPI: constraints list end\n");
+
+free_acpi_buffer:
+       ACPI_FREE(out_obj);
+}
+
+static void lpi_device_get_constraints(void)
+{
+       union acpi_object *out_obj;
+       int i;
+
+       out_obj = acpi_evaluate_dsm_typed(lps0_device_handle, &lps0_dsm_guid,
+                                         1, ACPI_LPS0_GET_DEVICE_CONSTRAINTS,
+                                         NULL, ACPI_TYPE_PACKAGE);
+
+       acpi_handle_debug(lps0_device_handle, "_DSM function 1 eval %s\n",
+                         out_obj ? "successful" : "failed");
+
+       if (!out_obj)
+               return;
+
+       lpi_constraints_table = kcalloc(out_obj->package.count,
+                                       sizeof(*lpi_constraints_table),
+                                       GFP_KERNEL);
+       if (!lpi_constraints_table)
+               goto free_acpi_buffer;
+
+       acpi_handle_debug(lps0_device_handle, "LPI: constraints list begin:\n");
+
+       for (i = 0; i < out_obj->package.count; i++) {
+               struct lpi_constraints *constraint;
+               acpi_status status;
+               union acpi_object *package = &out_obj->package.elements[i];
+               struct lpi_device_info info = { };
+               int package_count = 0, j;
+
+               if (!package)
+                       continue;
+
+               for (j = 0; j < package->package.count; ++j) {
+                       union acpi_object *element =
+                                       &(package->package.elements[j]);
+
+                       switch (element->type) {
+                       case ACPI_TYPE_INTEGER:
+                               info.enabled = element->integer.value;
+                               break;
+                       case ACPI_TYPE_STRING:
+                               info.name = element->string.pointer;
+                               break;
+                       case ACPI_TYPE_PACKAGE:
+                               package_count = element->package.count;
+                               info.package = element->package.elements;
+                               break;
+                       }
+               }
+
+               if (!info.enabled || !info.package || !info.name)
+                       continue;
+
+               constraint = &lpi_constraints_table[lpi_constraints_table_size];
+
+               status = acpi_get_handle(NULL, info.name, &constraint->handle);
+               if (ACPI_FAILURE(status))
+                       continue;
+
+               acpi_handle_debug(lps0_device_handle,
+                                 "index:%d Name:%s\n", i, info.name);
+
+               constraint->min_dstate = -1;
+
+               for (j = 0; j < package_count; ++j) {
+                       union acpi_object *info_obj = &info.package[j];
+                       union acpi_object *cnstr_pkg;
+                       union acpi_object *obj;
+                       struct lpi_device_constraint dev_info;
+
+                       switch (info_obj->type) {
+                       case ACPI_TYPE_INTEGER:
+                               /* version */
+                               break;
+                       case ACPI_TYPE_PACKAGE:
+                               if (info_obj->package.count < 2)
+                                       break;
+
+                               cnstr_pkg = info_obj->package.elements;
+                               obj = &cnstr_pkg[0];
+                               dev_info.uid = obj->integer.value;
+                               obj = &cnstr_pkg[1];
+                               dev_info.min_dstate = obj->integer.value;
+
+                               acpi_handle_debug(lps0_device_handle,
+                                       "uid:%d min_dstate:%s\n",
+                                       dev_info.uid,
+                                       acpi_power_state_string(dev_info.min_dstate));
+
+                               constraint->min_dstate = dev_info.min_dstate;
+                               break;
+                       }
+               }
+
+               if (constraint->min_dstate < 0) {
+                       acpi_handle_debug(lps0_device_handle,
+                                         "Incomplete constraint defined\n");
+                       continue;
+               }
+
+               lpi_constraints_table_size++;
+       }
+
+       acpi_handle_debug(lps0_device_handle, "LPI: constraints list end\n");
+
+free_acpi_buffer:
+       ACPI_FREE(out_obj);
+}
+
+static void lpi_check_constraints(void)
+{
+       int i;
+
+       for (i = 0; i < lpi_constraints_table_size; ++i) {
+               acpi_handle handle = lpi_constraints_table[i].handle;
+               struct acpi_device *adev;
+
+               if (!handle || acpi_bus_get_device(handle, &adev))
+                       continue;
+
+               acpi_handle_debug(handle,
+                       "LPI: required min power state:%s current power state:%s\n",
+                       acpi_power_state_string(lpi_constraints_table[i].min_dstate),
+                       acpi_power_state_string(adev->power.state));
+
+               if (!adev->flags.power_manageable) {
+                       acpi_handle_info(handle, "LPI: Device not power manageable\n");
+                       lpi_constraints_table[i].handle = NULL;
+                       continue;
+               }
+
+               if (adev->power.state < lpi_constraints_table[i].min_dstate)
+                       acpi_handle_info(handle,
+                               "LPI: Constraint not met; min power state:%s current power state:%s\n",
+                               acpi_power_state_string(lpi_constraints_table[i].min_dstate),
+                               acpi_power_state_string(adev->power.state));
+       }
+}
+
+static void acpi_sleep_run_lps0_dsm(unsigned int func)
+{
+       union acpi_object *out_obj;
+
+       if (!(lps0_dsm_func_mask & (1 << func)))
+               return;
+
+       out_obj = acpi_evaluate_dsm(lps0_device_handle, &lps0_dsm_guid, rev_id, func, NULL);
+       ACPI_FREE(out_obj);
+
+       acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
+                         func, out_obj ? "successful" : "failed");
+}
+
+static bool acpi_s2idle_vendor_amd(void)
+{
+       return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
+}
+
+static int lps0_device_attach(struct acpi_device *adev,
+                             const struct acpi_device_id *not_used)
+{
+       union acpi_object *out_obj;
+
+       if (lps0_device_handle)
+               return 0;
+
+       if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
+               return 0;
+
+       if (acpi_s2idle_vendor_amd()) {
+               guid_parse(ACPI_LPS0_DSM_UUID_AMD, &lps0_dsm_guid);
+               out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 0, 0, NULL);
+               rev_id = 0;
+       } else {
+               guid_parse(ACPI_LPS0_DSM_UUID, &lps0_dsm_guid);
+               out_obj = acpi_evaluate_dsm(adev->handle, &lps0_dsm_guid, 1, 0, NULL);
+               rev_id = 1;
+       }
+
+       /* Check if the _DSM is present and as expected. */
+       if (!out_obj || out_obj->type != ACPI_TYPE_BUFFER) {
+               acpi_handle_debug(adev->handle,
+                                 "_DSM function 0 evaluation failed\n");
+               return 0;
+       }
+
+       lps0_dsm_func_mask = *(char *)out_obj->buffer.pointer;
+
+       ACPI_FREE(out_obj);
+
+       acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
+                         lps0_dsm_func_mask);
+
+       lps0_device_handle = adev->handle;
+
+       if (acpi_s2idle_vendor_amd())
+               lpi_device_get_constraints_amd();
+       else
+               lpi_device_get_constraints();
+
+       /*
+        * Use suspend-to-idle by default if the default suspend mode was not
+        * set from the command line.
+        */
+       if (mem_sleep_default > PM_SUSPEND_MEM && !acpi_sleep_default_s3)
+               mem_sleep_current = PM_SUSPEND_TO_IDLE;
+
+       /*
+        * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the
+        * EC GPE to be enabled while suspended for certain wakeup devices to
+        * work, so mark it as wakeup-capable.
+        */
+       acpi_ec_mark_gpe_for_wake();
+
+       return 0;
+}
+
+static struct acpi_scan_handler lps0_handler = {
+       .ids = lps0_device_ids,
+       .attach = lps0_device_attach,
+};
+
+int acpi_s2idle_prepare_late(void)
+{
+       if (!lps0_device_handle || sleep_no_lps0)
+               return 0;
+
+       if (pm_debug_messages_on)
+               lpi_check_constraints();
+
+       if (acpi_s2idle_vendor_amd()) {
+               acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF_AMD);
+       } else {
+               acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
+               acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
+       }
+
+       return 0;
+}
+
+void acpi_s2idle_restore_early(void)
+{
+       if (!lps0_device_handle || sleep_no_lps0)
+               return;
+
+       if (acpi_s2idle_vendor_amd()) {
+               acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON_AMD);
+       } else {
+               acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
+               acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
+       }
+}
+
+static const struct platform_s2idle_ops acpi_s2idle_ops_lps0 = {
+       .begin = acpi_s2idle_begin,
+       .prepare = acpi_s2idle_prepare,
+       .prepare_late = acpi_s2idle_prepare_late,
+       .wake = acpi_s2idle_wake,
+       .restore_early = acpi_s2idle_restore_early,
+       .restore = acpi_s2idle_restore,
+       .end = acpi_s2idle_end,
+};
+
+void acpi_s2idle_setup(void)
+{
+       acpi_scan_add_handler(&lps0_handler);
+       s2idle_set_ops(&acpi_s2idle_ops_lps0);
+}
+
+#endif /* CONFIG_SUSPEND */
index 188e0b4..5265975 100644 (file)
@@ -2462,6 +2462,7 @@ static void blkback_changed(struct xenbus_device *dev,
                        break;
                if (talk_to_blkback(dev, info))
                        break;
+               break;
        case XenbusStateInitialising:
        case XenbusStateInitialised:
        case XenbusStateReconfiguring:
index cb2497d..90ed8c7 100644 (file)
@@ -1,7 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
-agpgart-y := backend.o frontend.o generic.o isoch.o
+agpgart-y := backend.o generic.o isoch.o
 
+ifeq ($(CONFIG_DRM_LEGACY),y)
 agpgart-$(CONFIG_COMPAT)       += compat_ioctl.o
+agpgart-y                      += frontend.o
+endif
+
 
 obj-$(CONFIG_AGP)              += agpgart.o
 obj-$(CONFIG_AGP_ALI)          += ali-agp.o
index 4eb1c77..bb09d64 100644 (file)
@@ -186,8 +186,13 @@ int agp_add_bridge(struct agp_bridge_data *bridge);
 void agp_remove_bridge(struct agp_bridge_data *bridge);
 
 /* Frontend routines. */
+#if IS_ENABLED(CONFIG_DRM_LEGACY)
 int agp_frontend_initialize(void);
 void agp_frontend_cleanup(void);
+#else
+static inline int agp_frontend_initialize(void) { return 0; }
+static inline void agp_frontend_cleanup(void) {}
+#endif
 
 /* Generic routines. */
 void agp_generic_enable(struct agp_bridge_data *bridge, u32 mode);
index 17c1df8..1fe006f 100644 (file)
@@ -528,15 +528,15 @@ endif # HW_RANDOM
 
 config UML_RANDOM
        depends on UML
-       tristate "Hardware random number generator"
+       select HW_RANDOM
+       tristate "UML Random Number Generator support"
        help
          This option enables UML's "hardware" random number generator.  It
          attaches itself to the host's /dev/random, supplying as much entropy
          as the host has, rather than the small amount the UML gets from its
-         own drivers.  It registers itself as a standard hardware random number
-         generator, major 10, minor 183, and the canonical device name is
-         /dev/hwrng.
-         The way to make use of this is to install the rng-tools package
-         (check your distro, or download from
-         http://sourceforge.net/projects/gkernel/).  rngd periodically reads
-         /dev/hwrng and injects the entropy into /dev/random.
+         own drivers. It registers itself as a rng-core driver thus providing
+         a device which is usually called /dev/hwrng. This hardware random
+         number generator does feed into the kernel's random number generator
+         entropy pool.
+
+         If unsure, say Y.
index c715d46..85856cf 100644 (file)
@@ -188,6 +188,14 @@ config COMMON_CLK_CS2000_CP
        help
          If you say yes here you get support for the CS2000 clock multiplier.
 
+config COMMON_CLK_FSL_FLEXSPI
+       tristate "Clock driver for FlexSPI on Layerscape SoCs"
+       depends on ARCH_LAYERSCAPE || COMPILE_TEST
+       default ARCH_LAYERSCAPE && SPI_NXP_FLEXSPI
+       help
+         On Layerscape SoCs there is a special clock for the FlexSPI
+         interface.
+
 config COMMON_CLK_FSL_SAI
        bool "Clock driver for BCLK of Freescale SAI cores"
        depends on ARCH_LAYERSCAPE || COMPILE_TEST
@@ -246,7 +254,8 @@ config COMMON_CLK_AXI_CLKGEN
 
 config CLK_QORIQ
        bool "Clock driver for Freescale QorIQ platforms"
-       depends on (PPC_E500MC || ARM || ARM64 || COMPILE_TEST) && OF
+       depends on OF
+       depends on PPC_E500MC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
        help
          This adds the clock driver support for Freescale QorIQ platforms
          using common clock framework.
index da8fcf1..dbdc590 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_COMMON_CLK_CS2000_CP)    += clk-cs2000-cp.o
 obj-$(CONFIG_ARCH_EFM32)               += clk-efm32gg.o
 obj-$(CONFIG_ARCH_SPARX5)              += clk-sparx5.o
 obj-$(CONFIG_COMMON_CLK_FIXED_MMIO)    += clk-fixed-mmio.o
+obj-$(CONFIG_COMMON_CLK_FSL_FLEXSPI)   += clk-fsl-flexspi.o
 obj-$(CONFIG_COMMON_CLK_FSL_SAI)       += clk-fsl-sai.o
 obj-$(CONFIG_COMMON_CLK_GEMINI)                += clk-gemini.o
 obj-$(CONFIG_COMMON_CLK_ASPEED)                += clk-aspeed.o
index 2c3d8e6..0fad100 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(rm9200_mck_lock);
+
 struct sck {
        char *n;
        char *p;
@@ -137,9 +139,20 @@ static void __init at91rm9200_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "pllack";
        parent_names[3] = "pllbck";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91rm9200_master_layout,
-                                     &rm9200_mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91rm9200_master_layout,
+                                          &rm9200_mck_characteristics,
+                                          &rm9200_mck_lock, CLK_SET_RATE_GATE,
+                                          INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91rm9200_master_layout,
+                                         &rm9200_mck_characteristics,
+                                         &rm9200_mck_lock, CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -181,7 +194,7 @@ static void __init at91rm9200_pmc_setup(struct device_node *np)
        for (i = 0; i < ARRAY_SIZE(at91rm9200_periphck); i++) {
                hw = at91_clk_register_peripheral(regmap,
                                                  at91rm9200_periphck[i].n,
-                                                 "masterck",
+                                                 "masterck_div",
                                                  at91rm9200_periphck[i].id);
                if (IS_ERR(hw))
                        goto err_free;
index bb81ff7..ceb5495 100644 (file)
@@ -32,6 +32,8 @@ struct at91sam926x_data {
        bool has_slck;
 };
 
+static DEFINE_SPINLOCK(at91sam9260_mck_lock);
+
 static const struct clk_master_characteristics sam9260_mck_characteristics = {
        .output = { .min = 0, .max = 105000000 },
        .divisors = { 1, 2, 4, 0 },
@@ -218,8 +220,8 @@ static const struct sck at91sam9261_systemck[] = {
        { .n = "pck1",  .p = "prog1",    .id = 9 },
        { .n = "pck2",  .p = "prog2",    .id = 10 },
        { .n = "pck3",  .p = "prog3",    .id = 11 },
-       { .n = "hclk0", .p = "masterck", .id = 16 },
-       { .n = "hclk1", .p = "masterck", .id = 17 },
+       { .n = "hclk0", .p = "masterck_div", .id = 16 },
+       { .n = "hclk1", .p = "masterck_div", .id = 17 },
 };
 
 static const struct pck at91sam9261_periphck[] = {
@@ -413,9 +415,21 @@ static void __init at91sam926x_pmc_setup(struct device_node *np,
        parent_names[1] = "mainck";
        parent_names[2] = "pllack";
        parent_names[3] = "pllbck";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91rm9200_master_layout,
-                                     data->mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91rm9200_master_layout,
+                                          data->mck_characteristics,
+                                          &at91sam9260_mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91rm9200_master_layout,
+                                         data->mck_characteristics,
+                                         &at91sam9260_mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -457,7 +471,7 @@ static void __init at91sam926x_pmc_setup(struct device_node *np,
        for (i = 0; i < data->num_pck; i++) {
                hw = at91_clk_register_peripheral(regmap,
                                                  data->pck[i].n,
-                                                 "masterck",
+                                                 "masterck_div",
                                                  data->pck[i].id);
                if (IS_ERR(hw))
                        goto err_free;
index cb4a406..0214333 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(at91sam9g45_mck_lock);
+
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 0, .max = 133333333 },
        .divisors = { 1, 2, 4, 3 },
@@ -40,10 +42,10 @@ static const struct {
        char *p;
        u8 id;
 } at91sam9g45_systemck[] = {
-       { .n = "ddrck", .p = "masterck", .id = 2 },
-       { .n = "uhpck", .p = "usbck",    .id = 6 },
-       { .n = "pck0",  .p = "prog0",    .id = 8 },
-       { .n = "pck1",  .p = "prog1",    .id = 9 },
+       { .n = "ddrck", .p = "masterck_div", .id = 2 },
+       { .n = "uhpck", .p = "usbck",        .id = 6 },
+       { .n = "pck0",  .p = "prog0",        .id = 8 },
+       { .n = "pck1",  .p = "prog1",        .id = 9 },
 };
 
 struct pck {
@@ -148,9 +150,21 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91rm9200_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91rm9200_master_layout,
+                                          &mck_characteristics,
+                                          &at91sam9g45_mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91rm9200_master_layout,
+                                         &mck_characteristics,
+                                         &at91sam9g45_mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -166,7 +180,7 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        for (i = 0; i < 2; i++) {
                char name[6];
 
@@ -195,7 +209,7 @@ static void __init at91sam9g45_pmc_setup(struct device_node *np)
        for (i = 0; i < ARRAY_SIZE(at91sam9g45_periphck); i++) {
                hw = at91_clk_register_peripheral(regmap,
                                                  at91sam9g45_periphck[i].n,
-                                                 "masterck",
+                                                 "masterck_div",
                                                  at91sam9g45_periphck[i].id);
                if (IS_ERR(hw))
                        goto err_free;
index 93f7eb2..f9db531 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(at91sam9n12_mck_lock);
+
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 0, .max = 133333333 },
        .divisors = { 1, 2, 4, 3 },
@@ -54,12 +56,12 @@ static const struct {
        char *p;
        u8 id;
 } at91sam9n12_systemck[] = {
-       { .n = "ddrck", .p = "masterck", .id = 2 },
-       { .n = "lcdck", .p = "masterck", .id = 3 },
-       { .n = "uhpck", .p = "usbck",    .id = 6 },
-       { .n = "udpck", .p = "usbck",    .id = 7 },
-       { .n = "pck0",  .p = "prog0",    .id = 8 },
-       { .n = "pck1",  .p = "prog1",    .id = 9 },
+       { .n = "ddrck", .p = "masterck_div", .id = 2 },
+       { .n = "lcdck", .p = "masterck_div", .id = 3 },
+       { .n = "uhpck", .p = "usbck",        .id = 6 },
+       { .n = "udpck", .p = "usbck",        .id = 7 },
+       { .n = "pck0",  .p = "prog0",        .id = 8 },
+       { .n = "pck1",  .p = "prog1",        .id = 9 },
 };
 
 static const struct clk_pcr_layout at91sam9n12_pcr_layout = {
@@ -175,9 +177,21 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "pllbck";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91sam9x5_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91sam9x5_master_layout,
+                                          &mck_characteristics,
+                                          &at91sam9n12_mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91sam9x5_master_layout,
+                                         &mck_characteristics,
+                                         &at91sam9n12_mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -191,7 +205,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "pllbck";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        for (i = 0; i < 2; i++) {
                char name[6];
 
@@ -221,7 +235,7 @@ static void __init at91sam9n12_pmc_setup(struct device_node *np)
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &at91sam9n12_pcr_layout,
                                                         at91sam9n12_periphck[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         at91sam9n12_periphck[i].id,
                                                         &range, INT_MIN);
                if (IS_ERR(hw))
index a343eb6..66736e0 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(sam9rl_mck_lock);
+
 static const struct clk_master_characteristics sam9rl_mck_characteristics = {
        .output = { .min = 0, .max = 94000000 },
        .divisors = { 1, 2, 4, 0 },
@@ -117,9 +119,20 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "pllack";
        parent_names[3] = "utmick";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91rm9200_master_layout,
-                                     &sam9rl_mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91rm9200_master_layout,
+                                          &sam9rl_mck_characteristics,
+                                          &sam9rl_mck_lock, CLK_SET_RATE_GATE,
+                                          INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91rm9200_master_layout,
+                                         &sam9rl_mck_characteristics,
+                                         &sam9rl_mck_lock, CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -129,7 +142,7 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "pllack";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        for (i = 0; i < 2; i++) {
                char name[6];
 
@@ -158,7 +171,7 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np)
        for (i = 0; i < ARRAY_SIZE(at91sam9rl_periphck); i++) {
                hw = at91_clk_register_peripheral(regmap,
                                                  at91sam9rl_periphck[i].n,
-                                                 "masterck",
+                                                 "masterck_div",
                                                  at91sam9rl_periphck[i].id);
                if (IS_ERR(hw))
                        goto err_free;
index 22b9aad..79b9d36 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(mck_lock);
+
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 0, .max = 133333333 },
        .divisors = { 1, 2, 4, 3 },
@@ -41,7 +43,7 @@ static const struct {
        char *p;
        u8 id;
 } at91sam9x5_systemck[] = {
-       { .n = "ddrck", .p = "masterck", .id = 2 },
+       { .n = "ddrck", .p = "masterck_div", .id = 2 },
        { .n = "smdck", .p = "smdclk",   .id = 4 },
        { .n = "uhpck", .p = "usbck",    .id = 6 },
        { .n = "udpck", .p = "usbck",    .id = 7 },
@@ -196,9 +198,19 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91sam9x5_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91sam9x5_master_layout,
+                                          &mck_characteristics, &mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91sam9x5_master_layout,
+                                         &mck_characteristics, &mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -218,7 +230,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        for (i = 0; i < 2; i++) {
                char name[6];
 
@@ -245,7 +257,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
        }
 
        if (has_lcdck) {
-               hw = at91_clk_register_system(regmap, "lcdck", "masterck", 3);
+               hw = at91_clk_register_system(regmap, "lcdck", "masterck_div", 3);
                if (IS_ERR(hw))
                        goto err_free;
 
@@ -256,7 +268,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &at91sam9x5_pcr_layout,
                                                         at91sam9x5_periphck[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         at91sam9x5_periphck[i].id,
                                                         &range, INT_MIN);
                if (IS_ERR(hw))
@@ -269,7 +281,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &at91sam9x5_pcr_layout,
                                                         extra_pcks[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         extra_pcks[i].id,
                                                         &range, INT_MIN);
                if (IS_ERR(hw))
index bd0d8a6..a804279 100644 (file)
@@ -15,7 +15,7 @@
 #define MASTER_PRES_MASK       0x7
 #define MASTER_PRES_MAX                MASTER_PRES_MASK
 #define MASTER_DIV_SHIFT       8
-#define MASTER_DIV_MASK                0x3
+#define MASTER_DIV_MASK                0x7
 
 #define PMC_MCR                        0x30
 #define PMC_MCR_ID_MSK         GENMASK(3, 0)
@@ -58,83 +58,309 @@ static inline bool clk_master_ready(struct clk_master *master)
 static int clk_master_prepare(struct clk_hw *hw)
 {
        struct clk_master *master = to_clk_master(hw);
+       unsigned long flags;
+
+       spin_lock_irqsave(master->lock, flags);
 
        while (!clk_master_ready(master))
                cpu_relax();
 
+       spin_unlock_irqrestore(master->lock, flags);
+
        return 0;
 }
 
 static int clk_master_is_prepared(struct clk_hw *hw)
 {
        struct clk_master *master = to_clk_master(hw);
+       unsigned long flags;
+       bool status;
 
-       return clk_master_ready(master);
+       spin_lock_irqsave(master->lock, flags);
+       status = clk_master_ready(master);
+       spin_unlock_irqrestore(master->lock, flags);
+
+       return status;
 }
 
-static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
-                                           unsigned long parent_rate)
+static unsigned long clk_master_div_recalc_rate(struct clk_hw *hw,
+                                               unsigned long parent_rate)
 {
-       u8 pres;
        u8 div;
-       unsigned long rate = parent_rate;
+       unsigned long flags, rate = parent_rate;
        struct clk_master *master = to_clk_master(hw);
        const struct clk_master_layout *layout = master->layout;
        const struct clk_master_characteristics *characteristics =
                                                master->characteristics;
        unsigned int mckr;
 
+       spin_lock_irqsave(master->lock, flags);
        regmap_read(master->regmap, master->layout->offset, &mckr);
+       spin_unlock_irqrestore(master->lock, flags);
+
        mckr &= layout->mask;
 
-       pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK;
        div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
 
-       if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX)
-               rate /= 3;
-       else
-               rate >>= pres;
-
        rate /= characteristics->divisors[div];
 
        if (rate < characteristics->output.min)
-               pr_warn("master clk is underclocked");
+               pr_warn("master clk div is underclocked");
        else if (rate > characteristics->output.max)
-               pr_warn("master clk is overclocked");
+               pr_warn("master clk div is overclocked");
 
        return rate;
 }
 
-static u8 clk_master_get_parent(struct clk_hw *hw)
+static const struct clk_ops master_div_ops = {
+       .prepare = clk_master_prepare,
+       .is_prepared = clk_master_is_prepared,
+       .recalc_rate = clk_master_div_recalc_rate,
+};
+
+static int clk_master_div_set_rate(struct clk_hw *hw, unsigned long rate,
+                                  unsigned long parent_rate)
+{
+       struct clk_master *master = to_clk_master(hw);
+       const struct clk_master_characteristics *characteristics =
+                                               master->characteristics;
+       unsigned long flags;
+       int div, i;
+
+       div = DIV_ROUND_CLOSEST(parent_rate, rate);
+       if (div > ARRAY_SIZE(characteristics->divisors))
+               return -EINVAL;
+
+       for (i = 0; i < ARRAY_SIZE(characteristics->divisors); i++) {
+               if (!characteristics->divisors[i])
+                       break;
+
+               if (div == characteristics->divisors[i]) {
+                       div = i;
+                       break;
+               }
+       }
+
+       if (i == ARRAY_SIZE(characteristics->divisors))
+               return -EINVAL;
+
+       spin_lock_irqsave(master->lock, flags);
+       regmap_update_bits(master->regmap, master->layout->offset,
+                          (MASTER_DIV_MASK << MASTER_DIV_SHIFT),
+                          (div << MASTER_DIV_SHIFT));
+       while (!clk_master_ready(master))
+               cpu_relax();
+       spin_unlock_irqrestore(master->lock, flags);
+
+       return 0;
+}
+
+static int clk_master_div_determine_rate(struct clk_hw *hw,
+                                        struct clk_rate_request *req)
+{
+       struct clk_master *master = to_clk_master(hw);
+       const struct clk_master_characteristics *characteristics =
+                                               master->characteristics;
+       struct clk_hw *parent;
+       unsigned long parent_rate, tmp_rate, best_rate = 0;
+       int i, best_diff = INT_MIN, tmp_diff;
+
+       parent = clk_hw_get_parent(hw);
+       if (!parent)
+               return -EINVAL;
+
+       parent_rate = clk_hw_get_rate(parent);
+       if (!parent_rate)
+               return -EINVAL;
+
+       for (i = 0; i < ARRAY_SIZE(characteristics->divisors); i++) {
+               if (!characteristics->divisors[i])
+                       break;
+
+               tmp_rate = DIV_ROUND_CLOSEST_ULL(parent_rate,
+                                                characteristics->divisors[i]);
+               tmp_diff = abs(tmp_rate - req->rate);
+
+               if (!best_rate || best_diff > tmp_diff) {
+                       best_diff = tmp_diff;
+                       best_rate = tmp_rate;
+               }
+
+               if (!best_diff)
+                       break;
+       }
+
+       req->best_parent_rate = best_rate;
+       req->best_parent_hw = parent;
+       req->rate = best_rate;
+
+       return 0;
+}
+
+static const struct clk_ops master_div_ops_chg = {
+       .prepare = clk_master_prepare,
+       .is_prepared = clk_master_is_prepared,
+       .recalc_rate = clk_master_div_recalc_rate,
+       .determine_rate = clk_master_div_determine_rate,
+       .set_rate = clk_master_div_set_rate,
+};
+
+static void clk_sama7g5_master_best_diff(struct clk_rate_request *req,
+                                        struct clk_hw *parent,
+                                        unsigned long parent_rate,
+                                        long *best_rate,
+                                        long *best_diff,
+                                        u32 div)
+{
+       unsigned long tmp_rate, tmp_diff;
+
+       if (div == MASTER_PRES_MAX)
+               tmp_rate = parent_rate / 3;
+       else
+               tmp_rate = parent_rate >> div;
+
+       tmp_diff = abs(req->rate - tmp_rate);
+
+       if (*best_diff < 0 || *best_diff >= tmp_diff) {
+               *best_rate = tmp_rate;
+               *best_diff = tmp_diff;
+               req->best_parent_rate = parent_rate;
+               req->best_parent_hw = parent;
+       }
+}
+
+static int clk_master_pres_determine_rate(struct clk_hw *hw,
+                                         struct clk_rate_request *req)
 {
        struct clk_master *master = to_clk_master(hw);
+       struct clk_rate_request req_parent = *req;
+       const struct clk_master_characteristics *characteristics =
+                                                       master->characteristics;
+       struct clk_hw *parent;
+       long best_rate = LONG_MIN, best_diff = LONG_MIN;
+       u32 pres;
+       int i;
+
+       if (master->chg_pid < 0)
+               return -EOPNOTSUPP;
+
+       parent = clk_hw_get_parent_by_index(hw, master->chg_pid);
+       if (!parent)
+               return -EOPNOTSUPP;
+
+       for (i = 0; i <= MASTER_PRES_MAX; i++) {
+               if (characteristics->have_div3_pres && i == MASTER_PRES_MAX)
+                       pres = 3;
+               else
+                       pres = 1 << i;
+
+               req_parent.rate = req->rate * pres;
+               if (__clk_determine_rate(parent, &req_parent))
+                       continue;
+
+               clk_sama7g5_master_best_diff(req, parent, req_parent.rate,
+                                            &best_diff, &best_rate, pres);
+               if (!best_diff)
+                       break;
+       }
+
+       return 0;
+}
+
+static int clk_master_pres_set_rate(struct clk_hw *hw, unsigned long rate,
+                                   unsigned long parent_rate)
+{
+       struct clk_master *master = to_clk_master(hw);
+       unsigned long flags;
+       unsigned int pres;
+
+       pres = DIV_ROUND_CLOSEST(parent_rate, rate);
+       if (pres > MASTER_PRES_MAX)
+               return -EINVAL;
+
+       else if (pres == 3)
+               pres = MASTER_PRES_MAX;
+       else
+               pres = ffs(pres) - 1;
+
+       spin_lock_irqsave(master->lock, flags);
+       regmap_update_bits(master->regmap, master->layout->offset,
+                          (MASTER_PRES_MASK << master->layout->pres_shift),
+                          (pres << master->layout->pres_shift));
+
+       while (!clk_master_ready(master))
+               cpu_relax();
+       spin_unlock_irqrestore(master->lock, flags);
+
+       return 0;
+}
+
+static unsigned long clk_master_pres_recalc_rate(struct clk_hw *hw,
+                                                unsigned long parent_rate)
+{
+       struct clk_master *master = to_clk_master(hw);
+       const struct clk_master_characteristics *characteristics =
+                                               master->characteristics;
+       unsigned long flags;
+       unsigned int val, pres;
+
+       spin_lock_irqsave(master->lock, flags);
+       regmap_read(master->regmap, master->layout->offset, &val);
+       spin_unlock_irqrestore(master->lock, flags);
+
+       pres = (val >> master->layout->pres_shift) & MASTER_PRES_MASK;
+       if (pres == 3 && characteristics->have_div3_pres)
+               pres = 3;
+       else
+               pres = (1 << pres);
+
+       return DIV_ROUND_CLOSEST_ULL(parent_rate, pres);
+}
+
+static u8 clk_master_pres_get_parent(struct clk_hw *hw)
+{
+       struct clk_master *master = to_clk_master(hw);
+       unsigned long flags;
        unsigned int mckr;
 
+       spin_lock_irqsave(master->lock, flags);
        regmap_read(master->regmap, master->layout->offset, &mckr);
+       spin_unlock_irqrestore(master->lock, flags);
 
        return mckr & AT91_PMC_CSS;
 }
 
-static const struct clk_ops master_ops = {
+static const struct clk_ops master_pres_ops = {
        .prepare = clk_master_prepare,
        .is_prepared = clk_master_is_prepared,
-       .recalc_rate = clk_master_recalc_rate,
-       .get_parent = clk_master_get_parent,
+       .recalc_rate = clk_master_pres_recalc_rate,
+       .get_parent = clk_master_pres_get_parent,
 };
 
-struct clk_hw * __init
-at91_clk_register_master(struct regmap *regmap,
+static const struct clk_ops master_pres_ops_chg = {
+       .prepare = clk_master_prepare,
+       .is_prepared = clk_master_is_prepared,
+       .determine_rate = clk_master_pres_determine_rate,
+       .recalc_rate = clk_master_pres_recalc_rate,
+       .get_parent = clk_master_pres_get_parent,
+       .set_rate = clk_master_pres_set_rate,
+};
+
+static struct clk_hw * __init
+at91_clk_register_master_internal(struct regmap *regmap,
                const char *name, int num_parents,
                const char **parent_names,
                const struct clk_master_layout *layout,
-               const struct clk_master_characteristics *characteristics)
+               const struct clk_master_characteristics *characteristics,
+               const struct clk_ops *ops, spinlock_t *lock, u32 flags,
+               int chg_pid)
 {
        struct clk_master *master;
        struct clk_init_data init;
        struct clk_hw *hw;
        int ret;
 
-       if (!name || !num_parents || !parent_names)
+       if (!name || !num_parents || !parent_names || !lock)
                return ERR_PTR(-EINVAL);
 
        master = kzalloc(sizeof(*master), GFP_KERNEL);
@@ -142,15 +368,17 @@ at91_clk_register_master(struct regmap *regmap,
                return ERR_PTR(-ENOMEM);
 
        init.name = name;
-       init.ops = &master_ops;
+       init.ops = ops;
        init.parent_names = parent_names;
        init.num_parents = num_parents;
-       init.flags = 0;
+       init.flags = flags;
 
        master->hw.init = &init;
        master->layout = layout;
        master->characteristics = characteristics;
        master->regmap = regmap;
+       master->chg_pid = chg_pid;
+       master->lock = lock;
 
        hw = &master->hw;
        ret = clk_hw_register(NULL, &master->hw);
@@ -162,37 +390,54 @@ at91_clk_register_master(struct regmap *regmap,
        return hw;
 }
 
-static unsigned long
-clk_sama7g5_master_recalc_rate(struct clk_hw *hw,
-                              unsigned long parent_rate)
+struct clk_hw * __init
+at91_clk_register_master_pres(struct regmap *regmap,
+               const char *name, int num_parents,
+               const char **parent_names,
+               const struct clk_master_layout *layout,
+               const struct clk_master_characteristics *characteristics,
+               spinlock_t *lock, u32 flags, int chg_pid)
 {
-       struct clk_master *master = to_clk_master(hw);
+       const struct clk_ops *ops;
 
-       return DIV_ROUND_CLOSEST_ULL(parent_rate, (1 << master->div));
+       if (flags & CLK_SET_RATE_GATE)
+               ops = &master_pres_ops;
+       else
+               ops = &master_pres_ops_chg;
+
+       return at91_clk_register_master_internal(regmap, name, num_parents,
+                                                parent_names, layout,
+                                                characteristics, ops,
+                                                lock, flags, chg_pid);
 }
 
-static void clk_sama7g5_master_best_diff(struct clk_rate_request *req,
-                                        struct clk_hw *parent,
-                                        unsigned long parent_rate,
-                                        long *best_rate,
-                                        long *best_diff,
-                                        u32 div)
+struct clk_hw * __init
+at91_clk_register_master_div(struct regmap *regmap,
+               const char *name, const char *parent_name,
+               const struct clk_master_layout *layout,
+               const struct clk_master_characteristics *characteristics,
+               spinlock_t *lock, u32 flags)
 {
-       unsigned long tmp_rate, tmp_diff;
+       const struct clk_ops *ops;
 
-       if (div == MASTER_PRES_MAX)
-               tmp_rate = parent_rate / 3;
+       if (flags & CLK_SET_RATE_GATE)
+               ops = &master_div_ops;
        else
-               tmp_rate = parent_rate >> div;
+               ops = &master_div_ops_chg;
 
-       tmp_diff = abs(req->rate - tmp_rate);
+       return at91_clk_register_master_internal(regmap, name, 1,
+                                                &parent_name, layout,
+                                                characteristics, ops,
+                                                lock, flags, -EINVAL);
+}
 
-       if (*best_diff < 0 || *best_diff >= tmp_diff) {
-               *best_rate = tmp_rate;
-               *best_diff = tmp_diff;
-               req->best_parent_rate = parent_rate;
-               req->best_parent_hw = parent;
-       }
+static unsigned long
+clk_sama7g5_master_recalc_rate(struct clk_hw *hw,
+                              unsigned long parent_rate)
+{
+       struct clk_master *master = to_clk_master(hw);
+
+       return DIV_ROUND_CLOSEST_ULL(parent_rate, (1 << master->div));
 }
 
 static int clk_sama7g5_master_determine_rate(struct clk_hw *hw,
index 78f458a..34e3ab1 100644 (file)
@@ -229,6 +229,57 @@ static int sam9x60_frac_pll_set_rate(struct clk_hw *hw, unsigned long rate,
        return sam9x60_frac_pll_compute_mul_frac(core, rate, parent_rate, true);
 }
 
+static int sam9x60_frac_pll_set_rate_chg(struct clk_hw *hw, unsigned long rate,
+                                        unsigned long parent_rate)
+{
+       struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw);
+       struct sam9x60_frac *frac = to_sam9x60_frac(core);
+       struct regmap *regmap = core->regmap;
+       unsigned long irqflags;
+       unsigned int val, cfrac, cmul;
+       long ret;
+
+       ret = sam9x60_frac_pll_compute_mul_frac(core, rate, parent_rate, true);
+       if (ret <= 0)
+               return ret;
+
+       spin_lock_irqsave(core->lock, irqflags);
+
+       regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, AT91_PMC_PLL_UPDT_ID_MSK,
+                          core->id);
+       regmap_read(regmap, AT91_PMC_PLL_CTRL1, &val);
+       cmul = (val & core->layout->mul_mask) >> core->layout->mul_shift;
+       cfrac = (val & core->layout->frac_mask) >> core->layout->frac_shift;
+
+       if (cmul == frac->mul && cfrac == frac->frac)
+               goto unlock;
+
+       regmap_write(regmap, AT91_PMC_PLL_CTRL1,
+                    (frac->mul << core->layout->mul_shift) |
+                    (frac->frac << core->layout->frac_shift));
+
+       regmap_update_bits(regmap, AT91_PMC_PLL_UPDT,
+                          AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK,
+                          AT91_PMC_PLL_UPDT_UPDATE | core->id);
+
+       regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0,
+                          AT91_PMC_PLL_CTRL0_ENLOCK | AT91_PMC_PLL_CTRL0_ENPLL,
+                          AT91_PMC_PLL_CTRL0_ENLOCK |
+                          AT91_PMC_PLL_CTRL0_ENPLL);
+
+       regmap_update_bits(regmap, AT91_PMC_PLL_UPDT,
+                          AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK,
+                          AT91_PMC_PLL_UPDT_UPDATE | core->id);
+
+       while (!sam9x60_pll_ready(regmap, core->id))
+               cpu_relax();
+
+unlock:
+       spin_unlock_irqrestore(core->lock, irqflags);
+
+       return ret;
+}
+
 static const struct clk_ops sam9x60_frac_pll_ops = {
        .prepare = sam9x60_frac_pll_prepare,
        .unprepare = sam9x60_frac_pll_unprepare,
@@ -238,6 +289,15 @@ static const struct clk_ops sam9x60_frac_pll_ops = {
        .set_rate = sam9x60_frac_pll_set_rate,
 };
 
+static const struct clk_ops sam9x60_frac_pll_ops_chg = {
+       .prepare = sam9x60_frac_pll_prepare,
+       .unprepare = sam9x60_frac_pll_unprepare,
+       .is_prepared = sam9x60_frac_pll_is_prepared,
+       .recalc_rate = sam9x60_frac_pll_recalc_rate,
+       .round_rate = sam9x60_frac_pll_round_rate,
+       .set_rate = sam9x60_frac_pll_set_rate_chg,
+};
+
 static int sam9x60_div_pll_prepare(struct clk_hw *hw)
 {
        struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw);
@@ -384,6 +444,44 @@ static int sam9x60_div_pll_set_rate(struct clk_hw *hw, unsigned long rate,
        return 0;
 }
 
+static int sam9x60_div_pll_set_rate_chg(struct clk_hw *hw, unsigned long rate,
+                                       unsigned long parent_rate)
+{
+       struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw);
+       struct sam9x60_div *div = to_sam9x60_div(core);
+       struct regmap *regmap = core->regmap;
+       unsigned long irqflags;
+       unsigned int val, cdiv;
+
+       div->div = DIV_ROUND_CLOSEST(parent_rate, rate) - 1;
+
+       spin_lock_irqsave(core->lock, irqflags);
+       regmap_update_bits(regmap, AT91_PMC_PLL_UPDT, AT91_PMC_PLL_UPDT_ID_MSK,
+                          core->id);
+       regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val);
+       cdiv = (val & core->layout->div_mask) >> core->layout->div_shift;
+
+       /* Stop if nothing changed. */
+       if (cdiv == div->div)
+               goto unlock;
+
+       regmap_update_bits(regmap, AT91_PMC_PLL_CTRL0,
+                          core->layout->div_mask,
+                          (div->div << core->layout->div_shift));
+
+       regmap_update_bits(regmap, AT91_PMC_PLL_UPDT,
+                          AT91_PMC_PLL_UPDT_UPDATE | AT91_PMC_PLL_UPDT_ID_MSK,
+                          AT91_PMC_PLL_UPDT_UPDATE | core->id);
+
+       while (!sam9x60_pll_ready(regmap, core->id))
+               cpu_relax();
+
+unlock:
+       spin_unlock_irqrestore(core->lock, irqflags);
+
+       return 0;
+}
+
 static const struct clk_ops sam9x60_div_pll_ops = {
        .prepare = sam9x60_div_pll_prepare,
        .unprepare = sam9x60_div_pll_unprepare,
@@ -393,17 +491,26 @@ static const struct clk_ops sam9x60_div_pll_ops = {
        .set_rate = sam9x60_div_pll_set_rate,
 };
 
+static const struct clk_ops sam9x60_div_pll_ops_chg = {
+       .prepare = sam9x60_div_pll_prepare,
+       .unprepare = sam9x60_div_pll_unprepare,
+       .is_prepared = sam9x60_div_pll_is_prepared,
+       .recalc_rate = sam9x60_div_pll_recalc_rate,
+       .round_rate = sam9x60_div_pll_round_rate,
+       .set_rate = sam9x60_div_pll_set_rate_chg,
+};
+
 struct clk_hw * __init
 sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
                              const char *name, const char *parent_name,
                              struct clk_hw *parent_hw, u8 id,
                              const struct clk_pll_characteristics *characteristics,
-                             const struct clk_pll_layout *layout, bool critical)
+                             const struct clk_pll_layout *layout, u32 flags)
 {
        struct sam9x60_frac *frac;
        struct clk_hw *hw;
        struct clk_init_data init;
-       unsigned long parent_rate, flags;
+       unsigned long parent_rate, irqflags;
        unsigned int val;
        int ret;
 
@@ -417,10 +524,12 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
        init.name = name;
        init.parent_names = &parent_name;
        init.num_parents = 1;
-       init.ops = &sam9x60_frac_pll_ops;
-       init.flags = CLK_SET_RATE_GATE;
-       if (critical)
-               init.flags |= CLK_IS_CRITICAL;
+       if (flags & CLK_SET_RATE_GATE)
+               init.ops = &sam9x60_frac_pll_ops;
+       else
+               init.ops = &sam9x60_frac_pll_ops_chg;
+
+       init.flags = flags;
 
        frac->core.id = id;
        frac->core.hw.init = &init;
@@ -429,7 +538,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
        frac->core.regmap = regmap;
        frac->core.lock = lock;
 
-       spin_lock_irqsave(frac->core.lock, flags);
+       spin_lock_irqsave(frac->core.lock, irqflags);
        if (sam9x60_pll_ready(regmap, id)) {
                regmap_update_bits(regmap, AT91_PMC_PLL_UPDT,
                                   AT91_PMC_PLL_UPDT_ID_MSK, id);
@@ -457,7 +566,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
                        goto free;
                }
        }
-       spin_unlock_irqrestore(frac->core.lock, flags);
+       spin_unlock_irqrestore(frac->core.lock, irqflags);
 
        hw = &frac->core.hw;
        ret = clk_hw_register(NULL, hw);
@@ -469,7 +578,7 @@ sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
        return hw;
 
 free:
-       spin_unlock_irqrestore(frac->core.lock, flags);
+       spin_unlock_irqrestore(frac->core.lock, irqflags);
        kfree(frac);
        return hw;
 }
@@ -478,12 +587,12 @@ struct clk_hw * __init
 sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock,
                             const char *name, const char *parent_name, u8 id,
                             const struct clk_pll_characteristics *characteristics,
-                            const struct clk_pll_layout *layout, bool critical)
+                            const struct clk_pll_layout *layout, u32 flags)
 {
        struct sam9x60_div *div;
        struct clk_hw *hw;
        struct clk_init_data init;
-       unsigned long flags;
+       unsigned long irqflags;
        unsigned int val;
        int ret;
 
@@ -497,11 +606,11 @@ sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock,
        init.name = name;
        init.parent_names = &parent_name;
        init.num_parents = 1;
-       init.ops = &sam9x60_div_pll_ops;
-       init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE |
-                    CLK_SET_RATE_PARENT;
-       if (critical)
-               init.flags |= CLK_IS_CRITICAL;
+       if (flags & CLK_SET_RATE_GATE)
+               init.ops = &sam9x60_div_pll_ops;
+       else
+               init.ops = &sam9x60_div_pll_ops_chg;
+       init.flags = flags;
 
        div->core.id = id;
        div->core.hw.init = &init;
@@ -510,14 +619,14 @@ sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock,
        div->core.regmap = regmap;
        div->core.lock = lock;
 
-       spin_lock_irqsave(div->core.lock, flags);
+       spin_lock_irqsave(div->core.lock, irqflags);
 
        regmap_update_bits(regmap, AT91_PMC_PLL_UPDT,
                           AT91_PMC_PLL_UPDT_ID_MSK, id);
        regmap_read(regmap, AT91_PMC_PLL_CTRL0, &val);
        div->div = FIELD_GET(PMC_PLL_CTRL0_DIV_MSK, val);
 
-       spin_unlock_irqrestore(div->core.lock, flags);
+       spin_unlock_irqrestore(div->core.lock, irqflags);
 
        hw = &div->core.hw;
        ret = clk_hw_register(NULL, hw);
index a50084d..a97b99c 100644 (file)
@@ -24,6 +24,8 @@
 
 #define GCK_INDEX_DT_AUDIO_PLL 5
 
+static DEFINE_SPINLOCK(mck_lock);
+
 #ifdef CONFIG_HAVE_AT91_AUDIO_PLL
 static void __init of_sama5d2_clk_audio_pll_frac_setup(struct device_node *np)
 {
@@ -388,9 +390,16 @@ of_at91_clk_master_setup(struct device_node *np,
        if (IS_ERR(regmap))
                return;
 
-       hw = at91_clk_register_master(regmap, name, num_parents,
-                                     parent_names, layout,
-                                     characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", num_parents,
+                                          parent_names, layout,
+                                          characteristics, &mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto out_free_characteristics;
+
+       hw = at91_clk_register_master_div(regmap, name, "masterck_pres",
+                                         layout, characteristics,
+                                         &mck_lock, CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto out_free_characteristics;
 
index 7b86aff..a49076c 100644 (file)
@@ -48,7 +48,7 @@ extern const struct clk_master_layout at91sam9x5_master_layout;
 
 struct clk_master_characteristics {
        struct clk_range output;
-       u32 divisors[4];
+       u32 divisors[5];
        u8 have_div3_pres;
 };
 
@@ -155,10 +155,18 @@ at91_clk_register_sam9x5_main(struct regmap *regmap, const char *name,
                              const char **parent_names, int num_parents);
 
 struct clk_hw * __init
-at91_clk_register_master(struct regmap *regmap, const char *name,
-                        int num_parents, const char **parent_names,
-                        const struct clk_master_layout *layout,
-                        const struct clk_master_characteristics *characteristics);
+at91_clk_register_master_pres(struct regmap *regmap, const char *name,
+                             int num_parents, const char **parent_names,
+                             const struct clk_master_layout *layout,
+                             const struct clk_master_characteristics *characteristics,
+                             spinlock_t *lock, u32 flags, int chg_pid);
+
+struct clk_hw * __init
+at91_clk_register_master_div(struct regmap *regmap, const char *name,
+                            const char *parent_names,
+                            const struct clk_master_layout *layout,
+                            const struct clk_master_characteristics *characteristics,
+                            spinlock_t *lock, u32 flags);
 
 struct clk_hw * __init
 at91_clk_sama7g5_register_master(struct regmap *regmap,
@@ -190,14 +198,14 @@ struct clk_hw * __init
 sam9x60_clk_register_div_pll(struct regmap *regmap, spinlock_t *lock,
                             const char *name, const char *parent_name, u8 id,
                             const struct clk_pll_characteristics *characteristics,
-                            const struct clk_pll_layout *layout, bool critical);
+                            const struct clk_pll_layout *layout, u32 flags);
 
 struct clk_hw * __init
 sam9x60_clk_register_frac_pll(struct regmap *regmap, spinlock_t *lock,
                              const char *name, const char *parent_name,
                              struct clk_hw *parent_hw, u8 id,
                              const struct clk_pll_characteristics *characteristics,
-                             const struct clk_pll_layout *layout, bool critical);
+                             const struct clk_pll_layout *layout, u32 flags);
 
 struct clk_hw * __init
 at91_clk_register_programmable(struct regmap *regmap, const char *name,
index 3c4c956..5f6fa89 100644 (file)
@@ -8,6 +8,7 @@
 #include "pmc.h"
 
 static DEFINE_SPINLOCK(pmc_pll_lock);
+static DEFINE_SPINLOCK(mck_lock);
 
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 140000000, .max = 200000000 },
@@ -76,11 +77,11 @@ static const struct {
        char *p;
        u8 id;
 } sam9x60_systemck[] = {
-       { .n = "ddrck",  .p = "masterck", .id = 2 },
+       { .n = "ddrck",  .p = "masterck_div", .id = 2 },
        { .n = "uhpck",  .p = "usbck",    .id = 6 },
        { .n = "pck0",   .p = "prog0",    .id = 8 },
        { .n = "pck1",   .p = "prog1",    .id = 9 },
-       { .n = "qspick", .p = "masterck", .id = 19 },
+       { .n = "qspick", .p = "masterck_div", .id = 19 },
 };
 
 static const struct {
@@ -174,7 +175,6 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
        struct regmap *regmap;
        struct clk_hw *hw;
        int i;
-       bool bypass;
 
        i = of_property_match_string(np, "clock-names", "td_slck");
        if (i < 0)
@@ -209,10 +209,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
        if (IS_ERR(hw))
                goto err_free;
 
-       bypass = of_property_read_bool(np, "atmel,osc-bypass");
-
-       hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name,
-                                       bypass);
+       hw = at91_clk_register_main_osc(regmap, "main_osc", mainxtal_name, 0);
        if (IS_ERR(hw))
                goto err_free;
        main_osc_hw = hw;
@@ -228,13 +225,24 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
        hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "pllack_fracck",
                                           "mainck", sam9x60_pmc->chws[PMC_MAIN],
                                           0, &plla_characteristics,
-                                          &pll_frac_layout, true);
+                                          &pll_frac_layout,
+                                          /*
+                                           * This feeds pllack_divck which
+                                           * feeds CPU. It should not be
+                                           * disabled.
+                                           */
+                                          CLK_IS_CRITICAL | CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
        hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "pllack_divck",
                                          "pllack_fracck", 0, &plla_characteristics,
-                                         &pll_div_layout, true);
+                                         &pll_div_layout,
+                                          /*
+                                           * This feeds CPU. It should not
+                                           * be disabled.
+                                           */
+                                         CLK_IS_CRITICAL | CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -243,13 +251,16 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
        hw = sam9x60_clk_register_frac_pll(regmap, &pmc_pll_lock, "upllck_fracck",
                                           "main_osc", main_osc_hw, 1,
                                           &upll_characteristics,
-                                          &pll_frac_layout, false);
+                                          &pll_frac_layout, CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
        hw = sam9x60_clk_register_div_pll(regmap, &pmc_pll_lock, "upllck_divck",
                                          "upllck_fracck", 1, &upll_characteristics,
-                                         &pll_div_layout, false);
+                                         &pll_div_layout,
+                                         CLK_SET_RATE_GATE |
+                                         CLK_SET_PARENT_GATE |
+                                         CLK_SET_RATE_PARENT);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -258,9 +269,17 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
        parent_names[0] = md_slck_name;
        parent_names[1] = "mainck";
        parent_names[2] = "pllack_divck";
-       hw = at91_clk_register_master(regmap, "masterck", 3, parent_names,
-                                     &sam9x60_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 3,
+                                          parent_names, &sam9x60_master_layout,
+                                          &mck_characteristics, &mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres", &sam9x60_master_layout,
+                                         &mck_characteristics, &mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -276,7 +295,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
        parent_names[0] = md_slck_name;
        parent_names[1] = td_slck_name;
        parent_names[2] = "mainck";
-       parent_names[3] = "masterck";
+       parent_names[3] = "masterck_div";
        parent_names[4] = "pllack_divck";
        parent_names[5] = "upllck_divck";
        for (i = 0; i < 2; i++) {
@@ -308,7 +327,7 @@ static void __init sam9x60_pmc_setup(struct device_node *np)
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &sam9x60_pcr_layout,
                                                         sam9x60_periphck[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         sam9x60_periphck[i].id,
                                                         &range, INT_MIN);
                if (IS_ERR(hw))
index 8b22076..9a5cbc7 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(mck_lock);
+
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 124000000, .max = 166000000 },
        .divisors = { 1, 2, 4, 3 },
@@ -40,14 +42,14 @@ static const struct {
        char *p;
        u8 id;
 } sama5d2_systemck[] = {
-       { .n = "ddrck", .p = "masterck", .id = 2 },
-       { .n = "lcdck", .p = "masterck", .id = 3 },
-       { .n = "uhpck", .p = "usbck",    .id = 6 },
-       { .n = "udpck", .p = "usbck",    .id = 7 },
-       { .n = "pck0",  .p = "prog0",    .id = 8 },
-       { .n = "pck1",  .p = "prog1",    .id = 9 },
-       { .n = "pck2",  .p = "prog2",    .id = 10 },
-       { .n = "iscck", .p = "masterck", .id = 18 },
+       { .n = "ddrck", .p = "masterck_div", .id = 2 },
+       { .n = "lcdck", .p = "masterck_div", .id = 3 },
+       { .n = "uhpck", .p = "usbck",        .id = 6 },
+       { .n = "udpck", .p = "usbck",        .id = 7 },
+       { .n = "pck0",  .p = "prog0",        .id = 8 },
+       { .n = "pck1",  .p = "prog1",        .id = 9 },
+       { .n = "pck2",  .p = "prog2",        .id = 10 },
+       { .n = "iscck", .p = "masterck_div", .id = 18 },
 };
 
 static const struct {
@@ -235,15 +237,25 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91sam9x5_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91sam9x5_master_layout,
+                                          &mck_characteristics, &mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91sam9x5_master_layout,
+                                         &mck_characteristics, &mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
        sama5d2_pmc->chws[PMC_MCK] = hw;
 
-       hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck");
+       hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck_div");
        if (IS_ERR(hw))
                goto err_free;
 
@@ -259,7 +271,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        parent_names[5] = "audiopll_pmcck";
        for (i = 0; i < 3; i++) {
                char name[6];
@@ -290,7 +302,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &sama5d2_pcr_layout,
                                                         sama5d2_periphck[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         sama5d2_periphck[i].id,
                                                         &range, INT_MIN);
                if (IS_ERR(hw))
@@ -317,7 +329,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        parent_names[5] = "audiopll_pmcck";
        for (i = 0; i < ARRAY_SIZE(sama5d2_gck); i++) {
                hw = at91_clk_register_generated(regmap, &pmc_pcr_lock,
index 7c6e0a5..87009ee 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(mck_lock);
+
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 0, .max = 166000000 },
        .divisors = { 1, 2, 4, 3 },
@@ -40,14 +42,14 @@ static const struct {
        char *p;
        u8 id;
 } sama5d3_systemck[] = {
-       { .n = "ddrck", .p = "masterck", .id = 2 },
-       { .n = "lcdck", .p = "masterck", .id = 3 },
-       { .n = "smdck", .p = "smdclk",   .id = 4 },
-       { .n = "uhpck", .p = "usbck",    .id = 6 },
-       { .n = "udpck", .p = "usbck",    .id = 7 },
-       { .n = "pck0",  .p = "prog0",    .id = 8 },
-       { .n = "pck1",  .p = "prog1",    .id = 9 },
-       { .n = "pck2",  .p = "prog2",    .id = 10 },
+       { .n = "ddrck", .p = "masterck_div", .id = 2 },
+       { .n = "lcdck", .p = "masterck_div", .id = 3 },
+       { .n = "smdck", .p = "smdclk",       .id = 4 },
+       { .n = "uhpck", .p = "usbck",        .id = 6 },
+       { .n = "udpck", .p = "usbck",        .id = 7 },
+       { .n = "pck0",  .p = "prog0",        .id = 8 },
+       { .n = "pck1",  .p = "prog1",        .id = 9 },
+       { .n = "pck2",  .p = "prog2",        .id = 10 },
 };
 
 static const struct {
@@ -170,9 +172,19 @@ static void __init sama5d3_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91sam9x5_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91sam9x5_master_layout,
+                                          &mck_characteristics, &mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91sam9x5_master_layout,
+                                         &mck_characteristics, &mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -192,7 +204,7 @@ static void __init sama5d3_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        for (i = 0; i < 3; i++) {
                char name[6];
 
@@ -222,7 +234,7 @@ static void __init sama5d3_pmc_setup(struct device_node *np)
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &sama5d3_pcr_layout,
                                                         sama5d3_periphck[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         sama5d3_periphck[i].id,
                                                         &sama5d3_periphck[i].r,
                                                         INT_MIN);
index 92d8d41..57fff79 100644 (file)
@@ -7,6 +7,8 @@
 
 #include "pmc.h"
 
+static DEFINE_SPINLOCK(mck_lock);
+
 static const struct clk_master_characteristics mck_characteristics = {
        .output = { .min = 125000000, .max = 200000000 },
        .divisors = { 1, 2, 4, 3 },
@@ -39,14 +41,14 @@ static const struct {
        char *p;
        u8 id;
 } sama5d4_systemck[] = {
-       { .n = "ddrck", .p = "masterck", .id = 2 },
-       { .n = "lcdck", .p = "masterck", .id = 3 },
-       { .n = "smdck", .p = "smdclk",   .id = 4 },
-       { .n = "uhpck", .p = "usbck",    .id = 6 },
-       { .n = "udpck", .p = "usbck",    .id = 7 },
-       { .n = "pck0",  .p = "prog0",    .id = 8 },
-       { .n = "pck1",  .p = "prog1",    .id = 9 },
-       { .n = "pck2",  .p = "prog2",    .id = 10 },
+       { .n = "ddrck", .p = "masterck_div", .id = 2 },
+       { .n = "lcdck", .p = "masterck_div", .id = 3 },
+       { .n = "smdck", .p = "smdclk",       .id = 4 },
+       { .n = "uhpck", .p = "usbck",        .id = 6 },
+       { .n = "udpck", .p = "usbck",        .id = 7 },
+       { .n = "pck0",  .p = "prog0",        .id = 8 },
+       { .n = "pck1",  .p = "prog1",        .id = 9 },
+       { .n = "pck2",  .p = "prog2",        .id = 10 },
 };
 
 static const struct {
@@ -185,15 +187,25 @@ static void __init sama5d4_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       hw = at91_clk_register_master(regmap, "masterck", 4, parent_names,
-                                     &at91sam9x5_master_layout,
-                                     &mck_characteristics);
+       hw = at91_clk_register_master_pres(regmap, "masterck_pres", 4,
+                                          parent_names,
+                                          &at91sam9x5_master_layout,
+                                          &mck_characteristics, &mck_lock,
+                                          CLK_SET_RATE_GATE, INT_MIN);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       hw = at91_clk_register_master_div(regmap, "masterck_div",
+                                         "masterck_pres",
+                                         &at91sam9x5_master_layout,
+                                         &mck_characteristics, &mck_lock,
+                                         CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                goto err_free;
 
        sama5d4_pmc->chws[PMC_MCK] = hw;
 
-       hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck");
+       hw = at91_clk_register_h32mx(regmap, "h32mxck", "masterck_div");
        if (IS_ERR(hw))
                goto err_free;
 
@@ -215,7 +227,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np)
        parent_names[1] = "mainck";
        parent_names[2] = "plladivck";
        parent_names[3] = "utmick";
-       parent_names[4] = "masterck";
+       parent_names[4] = "masterck_div";
        for (i = 0; i < 3; i++) {
                char name[6];
 
@@ -245,7 +257,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np)
                hw = at91_clk_register_sam9x5_peripheral(regmap, &pmc_pcr_lock,
                                                         &sama5d4_pcr_layout,
                                                         sama5d4_periphck[i].n,
-                                                        "masterck",
+                                                        "masterck_div",
                                                         sama5d4_periphck[i].id,
                                                         &range, INT_MIN);
                if (IS_ERR(hw))
index 0db2ab3..a6e20b3 100644 (file)
@@ -32,6 +32,7 @@
        } while (0)
 
 static DEFINE_SPINLOCK(pmc_pll_lock);
+static DEFINE_SPINLOCK(pmc_mck0_lock);
 static DEFINE_SPINLOCK(pmc_mckX_lock);
 
 /**
@@ -89,118 +90,198 @@ static const struct clk_pll_layout pll_layout_divio = {
        .endiv_shift    = 30,
 };
 
+/*
+ * CPU PLL output range.
+ * Notice: The upper limit has been setup to 1000000002 due to hardware
+ * block which cannot output exactly 1GHz.
+ */
+static const struct clk_range cpu_pll_outputs[] = {
+       { .min = 2343750, .max = 1000000002 },
+};
+
+/* PLL output range. */
+static const struct clk_range pll_outputs[] = {
+       { .min = 2343750, .max = 1200000000 },
+};
+
+/* CPU PLL characteristics. */
+static const struct clk_pll_characteristics cpu_pll_characteristics = {
+       .input = { .min = 12000000, .max = 50000000 },
+       .num_output = ARRAY_SIZE(cpu_pll_outputs),
+       .output = cpu_pll_outputs,
+};
+
+/* PLL characteristics. */
+static const struct clk_pll_characteristics pll_characteristics = {
+       .input = { .min = 12000000, .max = 50000000 },
+       .num_output = ARRAY_SIZE(pll_outputs),
+       .output = pll_outputs,
+};
+
 /**
  * PLL clocks description
  * @n:         clock name
  * @p:         clock parent
  * @l:         clock layout
+ * @c:         clock characteristics
  * @t:         clock type
- * @f:         true if clock is critical and cannot be disabled
+ * @f:         clock flags
  * @eid:       export index in sama7g5->chws[] array
  */
 static const struct {
        const char *n;
        const char *p;
        const struct clk_pll_layout *l;
+       const struct clk_pll_characteristics *c;
+       unsigned long f;
        u8 t;
-       u8 c;
        u8 eid;
 } sama7g5_plls[][PLL_ID_MAX] = {
        [PLL_ID_CPU] = {
                { .n = "cpupll_fracck",
                  .p = "mainck",
                  .l = &pll_layout_frac,
+                 .c = &cpu_pll_characteristics,
                  .t = PLL_TYPE_FRAC,
-                 .c = 1, },
+                  /*
+                   * This feeds cpupll_divpmcck which feeds CPU. It should
+                   * not be disabled.
+                   */
+                 .f = CLK_IS_CRITICAL, },
 
                { .n = "cpupll_divpmcck",
                  .p = "cpupll_fracck",
                  .l = &pll_layout_divpmc,
+                 .c = &cpu_pll_characteristics,
                  .t = PLL_TYPE_DIV,
-                 .c = 1, },
+                  /* This feeds CPU. It should not be disabled. */
+                 .f = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT,
+                 .eid = PMC_CPUPLL, },
        },
 
        [PLL_ID_SYS] = {
                { .n = "syspll_fracck",
                  .p = "mainck",
                  .l = &pll_layout_frac,
+                 .c = &pll_characteristics,
                  .t = PLL_TYPE_FRAC,
-                 .c = 1, },
+                  /*
+                   * This feeds syspll_divpmcck which may feed critial parts
+                   * of the systems like timers. Therefore it should not be
+                   * disabled.
+                   */
+                 .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, },
 
                { .n = "syspll_divpmcck",
                  .p = "syspll_fracck",
                  .l = &pll_layout_divpmc,
+                 .c = &pll_characteristics,
                  .t = PLL_TYPE_DIV,
-                 .c = 1, },
+                  /*
+                   * This may feed critial parts of the systems like timers.
+                   * Therefore it should not be disabled.
+                   */
+                 .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE,
+                 .eid = PMC_SYSPLL, },
        },
 
        [PLL_ID_DDR] = {
                { .n = "ddrpll_fracck",
                  .p = "mainck",
                  .l = &pll_layout_frac,
+                 .c = &pll_characteristics,
                  .t = PLL_TYPE_FRAC,
-                 .c = 1, },
+                  /*
+                   * This feeds ddrpll_divpmcck which feeds DDR. It should not
+                   * be disabled.
+                   */
+                 .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, },
 
                { .n = "ddrpll_divpmcck",
                  .p = "ddrpll_fracck",
                  .l = &pll_layout_divpmc,
+                 .c = &pll_characteristics,
                  .t = PLL_TYPE_DIV,
-                 .c = 1, },
+                  /* This feeds DDR. It should not be disabled. */
+                 .f = CLK_IS_CRITICAL | CLK_SET_RATE_GATE, },
        },
 
        [PLL_ID_IMG] = {
                { .n = "imgpll_fracck",
                  .p = "mainck",
                  .l = &pll_layout_frac,
-                 .t = PLL_TYPE_FRAC, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_FRAC,
+                 .f = CLK_SET_RATE_GATE, },
 
                { .n = "imgpll_divpmcck",
                  .p = "imgpll_fracck",
                  .l = &pll_layout_divpmc,
-                 .t = PLL_TYPE_DIV, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_DIV,
+                 .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE |
+                      CLK_SET_RATE_PARENT, },
        },
 
        [PLL_ID_BAUD] = {
                { .n = "baudpll_fracck",
                  .p = "mainck",
                  .l = &pll_layout_frac,
-                 .t = PLL_TYPE_FRAC, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_FRAC,
+                 .f = CLK_SET_RATE_GATE, },
 
                { .n = "baudpll_divpmcck",
                  .p = "baudpll_fracck",
                  .l = &pll_layout_divpmc,
-                 .t = PLL_TYPE_DIV, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_DIV,
+                 .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE |
+                      CLK_SET_RATE_PARENT, },
        },
 
        [PLL_ID_AUDIO] = {
                { .n = "audiopll_fracck",
                  .p = "main_xtal",
                  .l = &pll_layout_frac,
-                 .t = PLL_TYPE_FRAC, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_FRAC,
+                 .f = CLK_SET_RATE_GATE, },
 
                { .n = "audiopll_divpmcck",
                  .p = "audiopll_fracck",
                  .l = &pll_layout_divpmc,
+                 .c = &pll_characteristics,
                  .t = PLL_TYPE_DIV,
-                 .eid = PMC_I2S0_MUX, },
+                 .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE |
+                      CLK_SET_RATE_PARENT,
+                 .eid = PMC_AUDIOPMCPLL, },
 
                { .n = "audiopll_diviock",
                  .p = "audiopll_fracck",
                  .l = &pll_layout_divio,
+                 .c = &pll_characteristics,
                  .t = PLL_TYPE_DIV,
-                 .eid = PMC_I2S1_MUX, },
+                 .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE |
+                      CLK_SET_RATE_PARENT,
+                 .eid = PMC_AUDIOIOPLL, },
        },
 
        [PLL_ID_ETH] = {
                { .n = "ethpll_fracck",
                  .p = "main_xtal",
                  .l = &pll_layout_frac,
-                 .t = PLL_TYPE_FRAC, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_FRAC,
+                 .f = CLK_SET_RATE_GATE, },
 
                { .n = "ethpll_divpmcck",
                  .p = "ethpll_fracck",
                  .l = &pll_layout_divpmc,
-                 .t = PLL_TYPE_DIV, },
+                 .c = &pll_characteristics,
+                 .t = PLL_TYPE_DIV,
+                 .f = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE |
+                      CLK_SET_RATE_PARENT, },
        },
 };
 
@@ -245,7 +326,7 @@ static const struct {
          .ep = { "syspll_divpmcck", "ddrpll_divpmcck", "imgpll_divpmcck", },
          .ep_mux_table = { 5, 6, 7, },
          .ep_count = 3,
-         .ep_chg_id = 6, },
+         .ep_chg_id = 5, },
 
        { .n = "mck4",
          .id = 4,
@@ -278,7 +359,7 @@ static const struct {
 };
 
 /* Mux table for programmable clocks. */
-static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, };
+static u32 sama7g5_prog_mux_table[] = { 0, 1, 2, 5, 6, 7, 8, 9, 10, };
 
 /**
  * Peripheral clock description
@@ -401,7 +482,7 @@ static const struct {
          .pp = { "audiopll_divpmcck", },
          .pp_mux_table = { 9, },
          .pp_count = 1,
-         .pp_chg_id = 4, },
+         .pp_chg_id = 3, },
 
        { .n  = "csi_gclk",
          .id = 33,
@@ -513,7 +594,7 @@ static const struct {
          .pp = { "ethpll_divpmcck", },
          .pp_mux_table = { 10, },
          .pp_count = 1,
-         .pp_chg_id = 4, },
+         .pp_chg_id = 3, },
 
        { .n  = "gmac1_gclk",
          .id = 52,
@@ -545,7 +626,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "audiopll_divpmcck", },
          .pp_mux_table = { 5, 9, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n  = "i2smcc1_gclk",
          .id = 58,
@@ -553,7 +634,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "audiopll_divpmcck", },
          .pp_mux_table = { 5, 9, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n  = "mcan0_gclk",
          .id = 61,
@@ -695,7 +776,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "baudpll_divpmcck", },
          .pp_mux_table = { 5, 8, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n  = "sdmmc1_gclk",
          .id = 81,
@@ -703,7 +784,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "baudpll_divpmcck", },
          .pp_mux_table = { 5, 8, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n  = "sdmmc2_gclk",
          .id = 82,
@@ -711,7 +792,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "baudpll_divpmcck", },
          .pp_mux_table = { 5, 8, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n  = "spdifrx_gclk",
          .id = 84,
@@ -719,7 +800,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "audiopll_divpmcck", },
          .pp_mux_table = { 5, 9, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n = "spdiftx_gclk",
          .id = 85,
@@ -727,7 +808,7 @@ static const struct {
          .pp = { "syspll_divpmcck", "audiopll_divpmcck", },
          .pp_mux_table = { 5, 9, },
          .pp_count = 2,
-         .pp_chg_id = 5, },
+         .pp_chg_id = 4, },
 
        { .n  = "tcb0_ch0_gclk",
          .id = 88,
@@ -758,28 +839,16 @@ static const struct {
          .pp_chg_id = INT_MIN, },
 };
 
-/* PLL output range. */
-static const struct clk_range pll_outputs[] = {
-       { .min = 2343750, .max = 1200000000 },
-};
-
-/* PLL characteristics. */
-static const struct clk_pll_characteristics pll_characteristics = {
-       .input = { .min = 12000000, .max = 50000000 },
-       .num_output = ARRAY_SIZE(pll_outputs),
-       .output = pll_outputs,
-};
-
 /* MCK0 characteristics. */
 static const struct clk_master_characteristics mck0_characteristics = {
-       .output = { .min = 140000000, .max = 200000000 },
-       .divisors = { 1, 2, 4, 3 },
+       .output = { .min = 50000000, .max = 200000000 },
+       .divisors = { 1, 2, 4, 3, 5 },
        .have_div3_pres = 1,
 };
 
 /* MCK0 layout. */
 static const struct clk_master_layout mck0_layout = {
-       .mask = 0x373,
+       .mask = 0x773,
        .pres_shift = 4,
        .offset = 0x28,
 };
@@ -835,10 +904,10 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
        if (IS_ERR(regmap))
                return;
 
-       sama7g5_pmc = pmc_data_allocate(PMC_I2S1_MUX + 1,
+       sama7g5_pmc = pmc_data_allocate(PMC_CPU + 1,
                                        nck(sama7g5_systemck),
                                        nck(sama7g5_periphck),
-                                       nck(sama7g5_gck));
+                                       nck(sama7g5_gck), 8);
        if (!sama7g5_pmc)
                return;
 
@@ -886,18 +955,18 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
                                hw = sam9x60_clk_register_frac_pll(regmap,
                                        &pmc_pll_lock, sama7g5_plls[i][j].n,
                                        sama7g5_plls[i][j].p, parent_hw, i,
-                                       &pll_characteristics,
+                                       sama7g5_plls[i][j].c,
                                        sama7g5_plls[i][j].l,
-                                       sama7g5_plls[i][j].c);
+                                       sama7g5_plls[i][j].f);
                                break;
 
                        case PLL_TYPE_DIV:
                                hw = sam9x60_clk_register_div_pll(regmap,
                                        &pmc_pll_lock, sama7g5_plls[i][j].n,
                                        sama7g5_plls[i][j].p, i,
-                                       &pll_characteristics,
+                                       sama7g5_plls[i][j].c,
                                        sama7g5_plls[i][j].l,
-                                       sama7g5_plls[i][j].c);
+                                       sama7g5_plls[i][j].f);
                                break;
 
                        default:
@@ -912,12 +981,19 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
                }
        }
 
-       parent_names[0] = md_slck_name;
-       parent_names[1] = "mainck";
-       parent_names[2] = "cpupll_divpmcck";
-       parent_names[3] = "syspll_divpmcck";
-       hw = at91_clk_register_master(regmap, "mck0", 4, parent_names,
-                                     &mck0_layout, &mck0_characteristics);
+       parent_names[0] = "cpupll_divpmcck";
+       hw = at91_clk_register_master_pres(regmap, "cpuck", 1, parent_names,
+                                          &mck0_layout, &mck0_characteristics,
+                                          &pmc_mck0_lock,
+                                          CLK_SET_RATE_PARENT, 0);
+       if (IS_ERR(hw))
+               goto err_free;
+
+       sama7g5_pmc->chws[PMC_CPU] = hw;
+
+       hw = at91_clk_register_master_div(regmap, "mck0", "cpuck",
+                                         &mck0_layout, &mck0_characteristics,
+                                         &pmc_mck0_lock, 0);
        if (IS_ERR(hw))
                goto err_free;
 
@@ -926,9 +1002,8 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
        parent_names[0] = md_slck_name;
        parent_names[1] = td_slck_name;
        parent_names[2] = "mainck";
-       parent_names[3] = "mck0";
        for (i = 0; i < ARRAY_SIZE(sama7g5_mckx); i++) {
-               u8 num_parents = 4 + sama7g5_mckx[i].ep_count;
+               u8 num_parents = 3 + sama7g5_mckx[i].ep_count;
                u32 *mux_table;
 
                mux_table = kmalloc_array(num_parents, sizeof(*mux_table),
@@ -936,10 +1011,10 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
                if (!mux_table)
                        goto err_free;
 
-               SAMA7G5_INIT_TABLE(mux_table, 4);
-               SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_mckx[i].ep_mux_table,
+               SAMA7G5_INIT_TABLE(mux_table, 3);
+               SAMA7G5_FILL_TABLE(&mux_table[3], sama7g5_mckx[i].ep_mux_table,
                                   sama7g5_mckx[i].ep_count);
-               SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_mckx[i].ep,
+               SAMA7G5_FILL_TABLE(&parent_names[3], sama7g5_mckx[i].ep,
                                   sama7g5_mckx[i].ep_count);
 
                hw = at91_clk_sama7g5_register_master(regmap, sama7g5_mckx[i].n,
@@ -962,24 +1037,25 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
        parent_names[0] = md_slck_name;
        parent_names[1] = td_slck_name;
        parent_names[2] = "mainck";
-       parent_names[3] = "mck0";
-       parent_names[4] = "syspll_divpmcck";
-       parent_names[5] = "ddrpll_divpmcck";
-       parent_names[6] = "imgpll_divpmcck";
-       parent_names[7] = "baudpll_divpmcck";
-       parent_names[8] = "audiopll_divpmcck";
-       parent_names[9] = "ethpll_divpmcck";
+       parent_names[3] = "syspll_divpmcck";
+       parent_names[4] = "ddrpll_divpmcck";
+       parent_names[5] = "imgpll_divpmcck";
+       parent_names[6] = "baudpll_divpmcck";
+       parent_names[7] = "audiopll_divpmcck";
+       parent_names[8] = "ethpll_divpmcck";
        for (i = 0; i < 8; i++) {
                char name[6];
 
                snprintf(name, sizeof(name), "prog%d", i);
 
                hw = at91_clk_register_programmable(regmap, name, parent_names,
-                                                   10, i,
+                                                   9, i,
                                                    &programmable_layout,
                                                    sama7g5_prog_mux_table);
                if (IS_ERR(hw))
                        goto err_free;
+
+               sama7g5_pmc->pchws[i] = hw;
        }
 
        for (i = 0; i < ARRAY_SIZE(sama7g5_systemck); i++) {
@@ -1010,9 +1086,8 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
        parent_names[0] = md_slck_name;
        parent_names[1] = td_slck_name;
        parent_names[2] = "mainck";
-       parent_names[3] = "mck0";
        for (i = 0; i < ARRAY_SIZE(sama7g5_gck); i++) {
-               u8 num_parents = 4 + sama7g5_gck[i].pp_count;
+               u8 num_parents = 3 + sama7g5_gck[i].pp_count;
                u32 *mux_table;
 
                mux_table = kmalloc_array(num_parents, sizeof(*mux_table),
@@ -1020,10 +1095,10 @@ static void __init sama7g5_pmc_setup(struct device_node *np)
                if (!mux_table)
                        goto err_free;
 
-               SAMA7G5_INIT_TABLE(mux_table, 4);
-               SAMA7G5_FILL_TABLE(&mux_table[4], sama7g5_gck[i].pp_mux_table,
+               SAMA7G5_INIT_TABLE(mux_table, 3);
+               SAMA7G5_FILL_TABLE(&mux_table[3], sama7g5_gck[i].pp_mux_table,
                                   sama7g5_gck[i].pp_count);
-               SAMA7G5_FILL_TABLE(&parent_names[4], sama7g5_gck[i].pp,
+               SAMA7G5_FILL_TABLE(&parent_names[3], sama7g5_gck[i].pp,
                                   sama7g5_gck[i].pp_count);
 
                hw = at91_clk_register_generated(regmap, &pmc_pcr_lock,
@@ -1052,7 +1127,7 @@ err_free:
                kfree(alloc_mem);
        }
 
-       pmc_data_free(sama7g5_pmc);
+       kfree(sama7g5_pmc);
 }
 
 /* Some clks are used for a clocksource */
index 8333e20..e63a426 100644 (file)
@@ -25,7 +25,6 @@ static const struct clk_parent_data clk_dvp_parent = {
 static int clk_dvp_probe(struct platform_device *pdev)
 {
        struct clk_hw_onecell_data *data;
-       struct resource *res;
        struct clk_dvp *dvp;
        void __iomem *base;
        int ret;
@@ -42,7 +41,7 @@ static int clk_dvp_probe(struct platform_device *pdev)
                return -ENOMEM;
        data = dvp->data;
 
-       base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
@@ -108,6 +107,7 @@ static const struct of_device_id clk_dvp_dt_ids[] = {
        { .compatible = "brcm,brcm2711-dvp", },
        { /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, clk_dvp_dt_ids);
 
 static struct platform_driver clk_dvp_driver = {
        .probe  = clk_dvp_probe,
index 14d803e..ad86e03 100644 (file)
 #define MMCM_CLK_DIV_DIVIDE    BIT(11)
 #define MMCM_CLK_DIV_NOCOUNT   BIT(12)
 
+struct axi_clkgen_limits {
+       unsigned int fpfd_min;
+       unsigned int fpfd_max;
+       unsigned int fvco_min;
+       unsigned int fvco_max;
+};
+
 struct axi_clkgen {
        void __iomem *base;
        struct clk_hw clk_hw;
+       struct axi_clkgen_limits limits;
 };
 
 static uint32_t axi_clkgen_lookup_filter(unsigned int m)
@@ -100,12 +108,15 @@ static uint32_t axi_clkgen_lookup_lock(unsigned int m)
        return 0x1f1f00fa;
 }
 
-static const unsigned int fpfd_min = 10000;
-static const unsigned int fpfd_max = 300000;
-static const unsigned int fvco_min = 600000;
-static const unsigned int fvco_max = 1200000;
+static const struct axi_clkgen_limits axi_clkgen_zynq_default_limits = {
+       .fpfd_min = 10000,
+       .fpfd_max = 300000,
+       .fvco_min = 600000,
+       .fvco_max = 1200000,
+};
 
-static void axi_clkgen_calc_params(unsigned long fin, unsigned long fout,
+static void axi_clkgen_calc_params(const struct axi_clkgen_limits *limits,
+       unsigned long fin, unsigned long fout,
        unsigned int *best_d, unsigned int *best_m, unsigned int *best_dout)
 {
        unsigned long d, d_min, d_max, _d_min, _d_max;
@@ -122,12 +133,12 @@ static void axi_clkgen_calc_params(unsigned long fin, unsigned long fout,
        *best_m = 0;
        *best_dout = 0;
 
-       d_min = max_t(unsigned long, DIV_ROUND_UP(fin, fpfd_max), 1);
-       d_max = min_t(unsigned long, fin / fpfd_min, 80);
+       d_min = max_t(unsigned long, DIV_ROUND_UP(fin, limits->fpfd_max), 1);
+       d_max = min_t(unsigned long, fin / limits->fpfd_min, 80);
 
 again:
-       fvco_min_fract = fvco_min << fract_shift;
-       fvco_max_fract = fvco_max << fract_shift;
+       fvco_min_fract = limits->fvco_min << fract_shift;
+       fvco_max_fract = limits->fvco_max << fract_shift;
 
        m_min = max_t(unsigned long, DIV_ROUND_UP(fvco_min_fract, fin) * d_min, 1);
        m_max = min_t(unsigned long, fvco_max_fract * d_max / fin, 64 << fract_shift);
@@ -319,6 +330,7 @@ static int axi_clkgen_set_rate(struct clk_hw *clk_hw,
        unsigned long rate, unsigned long parent_rate)
 {
        struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(clk_hw);
+       const struct axi_clkgen_limits *limits = &axi_clkgen->limits;
        unsigned int d, m, dout;
        struct axi_clkgen_div_params params;
        uint32_t power = 0;
@@ -328,7 +340,7 @@ static int axi_clkgen_set_rate(struct clk_hw *clk_hw,
        if (parent_rate == 0 || rate == 0)
                return -EINVAL;
 
-       axi_clkgen_calc_params(parent_rate, rate, &d, &m, &dout);
+       axi_clkgen_calc_params(limits, parent_rate, rate, &d, &m, &dout);
 
        if (d == 0 || dout == 0 || m == 0)
                return -EINVAL;
@@ -368,10 +380,12 @@ static int axi_clkgen_set_rate(struct clk_hw *clk_hw,
 static long axi_clkgen_round_rate(struct clk_hw *hw, unsigned long rate,
        unsigned long *parent_rate)
 {
+       struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(hw);
+       const struct axi_clkgen_limits *limits = &axi_clkgen->limits;
        unsigned int d, m, dout;
        unsigned long long tmp;
 
-       axi_clkgen_calc_params(*parent_rate, rate, &d, &m, &dout);
+       axi_clkgen_calc_params(limits, *parent_rate, rate, &d, &m, &dout);
 
        if (d == 0 || dout == 0 || m == 0)
                return -EINVAL;
@@ -482,17 +496,9 @@ static const struct clk_ops axi_clkgen_ops = {
        .get_parent = axi_clkgen_get_parent,
 };
 
-static const struct of_device_id axi_clkgen_ids[] = {
-       {
-               .compatible = "adi,axi-clkgen-2.00.a",
-       },
-       { },
-};
-MODULE_DEVICE_TABLE(of, axi_clkgen_ids);
-
 static int axi_clkgen_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *id;
+       const struct axi_clkgen_limits *dflt_limits;
        struct axi_clkgen *axi_clkgen;
        struct clk_init_data init;
        const char *parent_names[2];
@@ -501,11 +507,8 @@ static int axi_clkgen_probe(struct platform_device *pdev)
        unsigned int i;
        int ret;
 
-       if (!pdev->dev.of_node)
-               return -ENODEV;
-
-       id = of_match_node(axi_clkgen_ids, pdev->dev.of_node);
-       if (!id)
+       dflt_limits = device_get_match_data(&pdev->dev);
+       if (!dflt_limits)
                return -ENODEV;
 
        axi_clkgen = devm_kzalloc(&pdev->dev, sizeof(*axi_clkgen), GFP_KERNEL);
@@ -527,6 +530,8 @@ static int axi_clkgen_probe(struct platform_device *pdev)
                        return -EINVAL;
        }
 
+       memcpy(&axi_clkgen->limits, dflt_limits, sizeof(axi_clkgen->limits));
+
        clk_name = pdev->dev.of_node->name;
        of_property_read_string(pdev->dev.of_node, "clock-output-names",
                &clk_name);
@@ -554,6 +559,15 @@ static int axi_clkgen_remove(struct platform_device *pdev)
        return 0;
 }
 
+static const struct of_device_id axi_clkgen_ids[] = {
+       {
+               .compatible = "adi,axi-clkgen-2.00.a",
+               .data = &axi_clkgen_zynq_default_limits,
+       },
+       { }
+};
+MODULE_DEVICE_TABLE(of, axi_clkgen_ids);
+
 static struct platform_driver axi_clkgen_driver = {
        .driver = {
                .name = "adi-axi-clkgen",
index 2ddb54f..0506046 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/device.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 
@@ -405,3 +406,52 @@ void clk_hw_unregister_composite(struct clk_hw *hw)
        kfree(composite);
 }
 EXPORT_SYMBOL_GPL(clk_hw_unregister_composite);
+
+static void devm_clk_hw_release_composite(struct device *dev, void *res)
+{
+       clk_hw_unregister_composite(*(struct clk_hw **)res);
+}
+
+static struct clk_hw *__devm_clk_hw_register_composite(struct device *dev,
+                       const char *name, const char * const *parent_names,
+                       const struct clk_parent_data *pdata, int num_parents,
+                       struct clk_hw *mux_hw, const struct clk_ops *mux_ops,
+                       struct clk_hw *rate_hw, const struct clk_ops *rate_ops,
+                       struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
+                       unsigned long flags)
+{
+       struct clk_hw **ptr, *hw;
+
+       ptr = devres_alloc(devm_clk_hw_release_composite, sizeof(*ptr),
+                          GFP_KERNEL);
+       if (!ptr)
+               return ERR_PTR(-ENOMEM);
+
+       hw = __clk_hw_register_composite(dev, name, parent_names, pdata,
+                                        num_parents, mux_hw, mux_ops, rate_hw,
+                                        rate_ops, gate_hw, gate_ops, flags);
+
+       if (!IS_ERR(hw)) {
+               *ptr = hw;
+               devres_add(dev, ptr);
+       } else {
+               devres_free(ptr);
+       }
+
+       return hw;
+}
+
+struct clk_hw *devm_clk_hw_register_composite_pdata(struct device *dev,
+                       const char *name,
+                       const struct clk_parent_data *parent_data,
+                       int num_parents,
+                       struct clk_hw *mux_hw, const struct clk_ops *mux_ops,
+                       struct clk_hw *rate_hw, const struct clk_ops *rate_ops,
+                       struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
+                       unsigned long flags)
+{
+       return __devm_clk_hw_register_composite(dev, name, NULL, parent_data,
+                                               num_parents, mux_hw, mux_ops,
+                                               rate_hw, rate_ops, gate_hw,
+                                               gate_ops, flags);
+}
index 8de12cb..c499799 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/io.h>
@@ -578,3 +579,36 @@ void clk_hw_unregister_divider(struct clk_hw *hw)
        kfree(div);
 }
 EXPORT_SYMBOL_GPL(clk_hw_unregister_divider);
+
+static void devm_clk_hw_release_divider(struct device *dev, void *res)
+{
+       clk_hw_unregister_divider(*(struct clk_hw **)res);
+}
+
+struct clk_hw *__devm_clk_hw_register_divider(struct device *dev,
+               struct device_node *np, const char *name,
+               const char *parent_name, const struct clk_hw *parent_hw,
+               const struct clk_parent_data *parent_data, unsigned long flags,
+               void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags,
+               const struct clk_div_table *table, spinlock_t *lock)
+{
+       struct clk_hw **ptr, *hw;
+
+       ptr = devres_alloc(devm_clk_hw_release_divider, sizeof(*ptr), GFP_KERNEL);
+       if (!ptr)
+               return ERR_PTR(-ENOMEM);
+
+       hw = __clk_hw_register_divider(dev, np, name, parent_name, parent_hw,
+                                      parent_data, flags, reg, shift, width,
+                                      clk_divider_flags, table, lock);
+
+       if (!IS_ERR(hw)) {
+               *ptr = hw;
+               devres_add(dev, ptr);
+       } else {
+               devres_free(ptr);
+       }
+
+       return hw;
+}
+EXPORT_SYMBOL_GPL(__devm_clk_hw_register_divider);
diff --git a/drivers/clk/clk-fsl-flexspi.c b/drivers/clk/clk-fsl-flexspi.c
new file mode 100644 (file)
index 0000000..8432d68
--- /dev/null
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Layerscape FlexSPI clock driver
+ *
+ * Copyright 2020 Michael Walle <michael@walle.cc>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+static const struct clk_div_table ls1028a_flexspi_divs[] = {
+       { .val = 0, .div = 1, },
+       { .val = 1, .div = 2, },
+       { .val = 2, .div = 3, },
+       { .val = 3, .div = 4, },
+       { .val = 4, .div = 5, },
+       { .val = 5, .div = 6, },
+       { .val = 6, .div = 7, },
+       { .val = 7, .div = 8, },
+       { .val = 11, .div = 12, },
+       { .val = 15, .div = 16, },
+       { .val = 16, .div = 20, },
+       { .val = 17, .div = 24, },
+       { .val = 18, .div = 28, },
+       { .val = 19, .div = 32, },
+       { .val = 20, .div = 80, },
+       {}
+};
+
+static const struct clk_div_table lx2160a_flexspi_divs[] = {
+       { .val = 1, .div = 2, },
+       { .val = 3, .div = 4, },
+       { .val = 5, .div = 6, },
+       { .val = 7, .div = 8, },
+       { .val = 11, .div = 12, },
+       { .val = 15, .div = 16, },
+       { .val = 16, .div = 20, },
+       { .val = 17, .div = 24, },
+       { .val = 18, .div = 28, },
+       { .val = 19, .div = 32, },
+       { .val = 20, .div = 80, },
+       {}
+};
+
+static int fsl_flexspi_clk_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
+       const char *clk_name = np->name;
+       const char *clk_parent;
+       struct resource *res;
+       void __iomem *reg;
+       struct clk_hw *hw;
+       const struct clk_div_table *divs;
+
+       divs = device_get_match_data(dev);
+       if (!divs)
+               return -ENOENT;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENOENT;
+
+       /*
+        * Can't use devm_ioremap_resource() or devm_of_iomap() because the
+        * resource might already be taken by the parent device.
+        */
+       reg = devm_ioremap(dev, res->start, resource_size(res));
+       if (!reg)
+               return -ENOMEM;
+
+       clk_parent = of_clk_get_parent_name(np, 0);
+       if (!clk_parent)
+               return -EINVAL;
+
+       of_property_read_string(np, "clock-output-names", &clk_name);
+
+       hw = devm_clk_hw_register_divider_table(dev, clk_name, clk_parent, 0,
+                                               reg, 0, 5, 0, divs, NULL);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+
+       return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+}
+
+static const struct of_device_id fsl_flexspi_clk_dt_ids[] = {
+       { .compatible = "fsl,ls1028a-flexspi-clk", .data = &ls1028a_flexspi_divs },
+       { .compatible = "fsl,lx2160a-flexspi-clk", .data = &lx2160a_flexspi_divs },
+       {}
+};
+MODULE_DEVICE_TABLE(of, fsl_flexspi_clk_dt_ids);
+
+static struct platform_driver fsl_flexspi_clk_driver = {
+       .driver = {
+               .name = "fsl-flexspi-clk",
+               .of_match_table = fsl_flexspi_clk_dt_ids,
+       },
+       .probe = fsl_flexspi_clk_probe,
+};
+module_platform_driver(fsl_flexspi_clk_driver);
+
+MODULE_DESCRIPTION("FlexSPI clock driver for Layerscape SoCs");
+MODULE_AUTHOR("Michael Walle <michael@walle.cc>");
+MODULE_LICENSE("GPL");
index 0221180..6238fce 100644 (file)
@@ -58,13 +58,13 @@ static int fsl_sai_clk_probe(struct platform_device *pdev)
        /* set clock direction, we are the BCLK master */
        writel(CR2_BCD, base + I2S_CR2);
 
-       hw = clk_hw_register_composite_pdata(dev, dev->of_node->name,
-                                            &pdata, 1, NULL, NULL,
-                                            &sai_clk->div.hw,
-                                            &clk_divider_ops,
-                                            &sai_clk->gate.hw,
-                                            &clk_gate_ops,
-                                            CLK_SET_RATE_GATE);
+       hw = devm_clk_hw_register_composite_pdata(dev, dev->of_node->name,
+                                                 &pdata, 1, NULL, NULL,
+                                                 &sai_clk->div.hw,
+                                                 &clk_divider_ops,
+                                                 &sai_clk->gate.hw,
+                                                 &clk_gate_ops,
+                                                 CLK_SET_RATE_GATE);
        if (IS_ERR(hw))
                return PTR_ERR(hw);
 
index 86f2e2d..da2c8ed 100644 (file)
@@ -147,7 +147,7 @@ static struct platform_driver clk_pwm_driver = {
        .remove = clk_pwm_remove,
        .driver = {
                .name = "pwm-clock",
-               .of_match_table = of_match_ptr(clk_pwm_dt_ids),
+               .of_match_table = clk_pwm_dt_ids,
        },
 };
 
index 46101c6..70aa521 100644 (file)
@@ -7,6 +7,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <dt-bindings/clock/fsl,qoriq-clockgen.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
@@ -1368,33 +1369,33 @@ static struct clk *clockgen_clk_get(struct of_phandle_args *clkspec, void *data)
        idx = clkspec->args[1];
 
        switch (type) {
-       case 0:
+       case QORIQ_CLK_SYSCLK:
                if (idx != 0)
                        goto bad_args;
                clk = cg->sysclk;
                break;
-       case 1:
+       case QORIQ_CLK_CMUX:
                if (idx >= ARRAY_SIZE(cg->cmux))
                        goto bad_args;
                clk = cg->cmux[idx];
                break;
-       case 2:
+       case QORIQ_CLK_HWACCEL:
                if (idx >= ARRAY_SIZE(cg->hwaccel))
                        goto bad_args;
                clk = cg->hwaccel[idx];
                break;
-       case 3:
+       case QORIQ_CLK_FMAN:
                if (idx >= ARRAY_SIZE(cg->fman))
                        goto bad_args;
                clk = cg->fman[idx];
                break;
-       case 4:
+       case QORIQ_CLK_PLATFORM_PLL:
                pll = &cg->pll[PLATFORM_PLL];
                if (idx >= ARRAY_SIZE(pll->div))
                        goto bad_args;
                clk = pll->div[idx].clk;
                break;
-       case 5:
+       case QORIQ_CLK_CORECLK:
                if (idx != 0)
                        goto bad_args;
                clk = cg->coreclk;
index aa21371..a3e883a 100644 (file)
@@ -195,6 +195,7 @@ static int s2mps11_clk_probe(struct platform_device *pdev)
        return ret;
 
 err_reg:
+       of_node_put(s2mps11_clks[0].clk_np);
        while (--i >= 0)
                clkdev_drop(s2mps11_clks[i].lookup);
 
index 5a9b140..a39af76 100644 (file)
@@ -129,7 +129,7 @@ static const struct clk_ops scpi_dvfs_ops = {
        .set_rate = scpi_dvfs_set_rate,
 };
 
-static const struct of_device_id scpi_clk_match[] = {
+static const struct of_device_id scpi_clk_match[] __maybe_unused = {
        { .compatible = "arm,scpi-dvfs-clocks", .data = &scpi_dvfs_ops, },
        { .compatible = "arm,scpi-variable-clocks", .data = &scpi_clk_ops, },
        {}
index 1e1702e..57e4597 100644 (file)
@@ -902,6 +902,10 @@ static int _si5351_clkout_set_disable_state(
 static void _si5351_clkout_reset_pll(struct si5351_driver_data *drvdata, int num)
 {
        u8 val = si5351_reg_read(drvdata, SI5351_CLK0_CTRL + num);
+       u8 mask = val & SI5351_CLK_PLL_SELECT ? SI5351_PLL_RESET_B :
+                                                      SI5351_PLL_RESET_A;
+       unsigned int v;
+       int err;
 
        switch (val & SI5351_CLK_INPUT_MASK) {
        case SI5351_CLK_INPUT_XTAL:
@@ -909,9 +913,12 @@ static void _si5351_clkout_reset_pll(struct si5351_driver_data *drvdata, int num
                return;  /* pll not used, no need to reset */
        }
 
-       si5351_reg_write(drvdata, SI5351_PLL_RESET,
-                        val & SI5351_CLK_PLL_SELECT ? SI5351_PLL_RESET_B :
-                                                      SI5351_PLL_RESET_A);
+       si5351_reg_write(drvdata, SI5351_PLL_RESET, mask);
+
+       err = regmap_read_poll_timeout(drvdata->regmap, SI5351_PLL_RESET, v,
+                                !(v & mask), 0, 20000);
+       if (err < 0)
+               dev_err(&drvdata->client->dev, "Reset bit didn't clear\n");
 
        dev_dbg(&drvdata->client->dev, "%s - %s: pll = %d\n",
                __func__, clk_hw_get_name(&drvdata->clkout[num].hw),
index c90460e..43db673 100644 (file)
@@ -739,8 +739,8 @@ static int vc5_update_power(struct device_node *np_output,
 {
        u32 value;
 
-       if (!of_property_read_u32(np_output,
-                                 "idt,voltage-microvolts", &value)) {
+       if (!of_property_read_u32(np_output, "idt,voltage-microvolt",
+                                 &value)) {
                clk_out->clk_output_cfg0_mask |= VC5_CLK_OUTPUT_CFG0_PWR_MASK;
                switch (value) {
                case 1800000:
index f83dac5..8c1d04d 100644 (file)
@@ -420,7 +420,7 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index)
 static void clk_core_fill_parent_index(struct clk_core *core, u8 index)
 {
        struct clk_parent_map *entry = &core->parents[index];
-       struct clk_core *parent = ERR_PTR(-ENOENT);
+       struct clk_core *parent;
 
        if (entry->hw) {
                parent = entry->hw->core;
@@ -2314,6 +2314,8 @@ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max)
        if (!clk)
                return 0;
 
+       trace_clk_set_rate_range(clk->core, min, max);
+
        if (min > max) {
                pr_err("%s: clk %s dev %s con %s: invalid range [%lu, %lu]\n",
                       __func__, clk->core->name, clk->dev_id, clk->con_id,
@@ -2381,6 +2383,8 @@ int clk_set_min_rate(struct clk *clk, unsigned long rate)
        if (!clk)
                return 0;
 
+       trace_clk_set_min_rate(clk->core, rate);
+
        return clk_set_rate_range(clk, rate, clk->max_rate);
 }
 EXPORT_SYMBOL_GPL(clk_set_min_rate);
@@ -2397,6 +2401,8 @@ int clk_set_max_rate(struct clk *clk, unsigned long rate)
        if (!clk)
                return 0;
 
+       trace_clk_set_max_rate(clk->core, rate);
+
        return clk_set_rate_range(clk, clk->min_rate, rate);
 }
 EXPORT_SYMBOL_GPL(clk_set_max_rate);
@@ -2931,7 +2937,14 @@ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c,
        else
                seq_puts(s, "-----");
 
-       seq_printf(s, " %6d\n", clk_core_get_scaled_duty_cycle(c, 100000));
+       seq_printf(s, " %6d", clk_core_get_scaled_duty_cycle(c, 100000));
+
+       if (c->ops->is_enabled)
+               seq_printf(s, " %9c\n", clk_core_is_enabled(c) ? 'Y' : 'N');
+       else if (!c->ops->enable)
+               seq_printf(s, " %9c\n", 'Y');
+       else
+               seq_printf(s, " %9c\n", '?');
 }
 
 static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c,
@@ -2950,9 +2963,9 @@ static int clk_summary_show(struct seq_file *s, void *data)
        struct clk_core *c;
        struct hlist_head **lists = (struct hlist_head **)s->private;
 
-       seq_puts(s, "                                 enable  prepare  protect                                duty\n");
-       seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle\n");
-       seq_puts(s, "---------------------------------------------------------------------------------------------\n");
+       seq_puts(s, "                                 enable  prepare  protect                                duty  hardware\n");
+       seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle    enable\n");
+       seq_puts(s, "-------------------------------------------------------------------------------------------------------\n");
 
        clk_prepare_lock();
 
@@ -3667,6 +3680,24 @@ struct clk *clk_hw_create_clk(struct device *dev, struct clk_hw *hw,
        return clk;
 }
 
+/**
+ * clk_hw_get_clk - get clk consumer given an clk_hw
+ * @hw: clk_hw associated with the clk being consumed
+ * @con_id: connection ID string on device
+ *
+ * Returns: new clk consumer
+ * This is the function to be used by providers which need
+ * to get a consumer clk and act on the clock element
+ * Calls to this function must be balanced with calls clk_put()
+ */
+struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id)
+{
+       struct device *dev = hw->core->dev;
+
+       return clk_hw_create_clk(dev, hw, dev_name(dev), con_id);
+}
+EXPORT_SYMBOL(clk_hw_get_clk);
+
 static int clk_cpy_name(const char **dst_p, const char *src, bool must_exist)
 {
        const char *dst;
@@ -4068,12 +4099,12 @@ void clk_hw_unregister(struct clk_hw *hw)
 }
 EXPORT_SYMBOL_GPL(clk_hw_unregister);
 
-static void devm_clk_release(struct device *dev, void *res)
+static void devm_clk_unregister_cb(struct device *dev, void *res)
 {
        clk_unregister(*(struct clk **)res);
 }
 
-static void devm_clk_hw_release(struct device *dev, void *res)
+static void devm_clk_hw_unregister_cb(struct device *dev, void *res)
 {
        clk_hw_unregister(*(struct clk_hw **)res);
 }
@@ -4093,7 +4124,7 @@ struct clk *devm_clk_register(struct device *dev, struct clk_hw *hw)
        struct clk *clk;
        struct clk **clkp;
 
-       clkp = devres_alloc(devm_clk_release, sizeof(*clkp), GFP_KERNEL);
+       clkp = devres_alloc(devm_clk_unregister_cb, sizeof(*clkp), GFP_KERNEL);
        if (!clkp)
                return ERR_PTR(-ENOMEM);
 
@@ -4123,7 +4154,7 @@ int devm_clk_hw_register(struct device *dev, struct clk_hw *hw)
        struct clk_hw **hwp;
        int ret;
 
-       hwp = devres_alloc(devm_clk_hw_release, sizeof(*hwp), GFP_KERNEL);
+       hwp = devres_alloc(devm_clk_hw_unregister_cb, sizeof(*hwp), GFP_KERNEL);
        if (!hwp)
                return -ENOMEM;
 
@@ -4167,7 +4198,7 @@ static int devm_clk_hw_match(struct device *dev, void *res, void *data)
  */
 void devm_clk_unregister(struct device *dev, struct clk *clk)
 {
-       WARN_ON(devres_release(dev, devm_clk_release, devm_clk_match, clk));
+       WARN_ON(devres_release(dev, devm_clk_unregister_cb, devm_clk_match, clk));
 }
 EXPORT_SYMBOL_GPL(devm_clk_unregister);
 
@@ -4182,11 +4213,54 @@ EXPORT_SYMBOL_GPL(devm_clk_unregister);
  */
 void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw)
 {
-       WARN_ON(devres_release(dev, devm_clk_hw_release, devm_clk_hw_match,
+       WARN_ON(devres_release(dev, devm_clk_hw_unregister_cb, devm_clk_hw_match,
                                hw));
 }
 EXPORT_SYMBOL_GPL(devm_clk_hw_unregister);
 
+static void devm_clk_release(struct device *dev, void *res)
+{
+       clk_put(*(struct clk **)res);
+}
+
+/**
+ * devm_clk_hw_get_clk - resource managed clk_hw_get_clk()
+ * @dev: device that is registering this clock
+ * @hw: clk_hw associated with the clk being consumed
+ * @con_id: connection ID string on device
+ *
+ * Managed clk_hw_get_clk(). Clocks got with this function are
+ * automatically clk_put() on driver detach. See clk_put()
+ * for more information.
+ */
+struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw,
+                               const char *con_id)
+{
+       struct clk *clk;
+       struct clk **clkp;
+
+       /* This should not happen because it would mean we have drivers
+        * passing around clk_hw pointers instead of having the caller use
+        * proper clk_get() style APIs
+        */
+       WARN_ON_ONCE(dev != hw->core->dev);
+
+       clkp = devres_alloc(devm_clk_release, sizeof(*clkp), GFP_KERNEL);
+       if (!clkp)
+               return ERR_PTR(-ENOMEM);
+
+       clk = clk_hw_get_clk(hw, con_id);
+       if (!IS_ERR(clk)) {
+               *clkp = clk;
+               devres_add(dev, clkp);
+       } else {
+               devres_free(clkp);
+       }
+
+       return clk;
+}
+EXPORT_SYMBOL_GPL(devm_clk_hw_get_clk);
+
 /*
  * clkdev helpers
  */
@@ -4334,6 +4408,42 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(clk_notifier_unregister);
 
+struct clk_notifier_devres {
+       struct clk *clk;
+       struct notifier_block *nb;
+};
+
+static void devm_clk_notifier_release(struct device *dev, void *res)
+{
+       struct clk_notifier_devres *devres = res;
+
+       clk_notifier_unregister(devres->clk, devres->nb);
+}
+
+int devm_clk_notifier_register(struct device *dev, struct clk *clk,
+                              struct notifier_block *nb)
+{
+       struct clk_notifier_devres *devres;
+       int ret;
+
+       devres = devres_alloc(devm_clk_notifier_release,
+                             sizeof(*devres), GFP_KERNEL);
+
+       if (!devres)
+               return -ENOMEM;
+
+       ret = clk_notifier_register(clk, nb);
+       if (!ret) {
+               devres->clk = clk;
+               devres->nb = nb;
+       } else {
+               devres_free(devres);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(devm_clk_notifier_register);
+
 #ifdef CONFIG_OF
 static void clk_core_reparent_orphans(void)
 {
index 7eed708..f16c401 100644 (file)
@@ -30,6 +30,7 @@ struct clk_gate2 {
        void __iomem    *reg;
        u8              bit_idx;
        u8              cgr_val;
+       u8              cgr_mask;
        u8              flags;
        spinlock_t      *lock;
        unsigned int    *share_count;
@@ -37,37 +38,38 @@ struct clk_gate2 {
 
 #define to_clk_gate2(_hw) container_of(_hw, struct clk_gate2, hw)
 
-static int clk_gate2_enable(struct clk_hw *hw)
+static void clk_gate2_do_shared_clks(struct clk_hw *hw, bool enable)
 {
        struct clk_gate2 *gate = to_clk_gate2(hw);
        u32 reg;
+
+       reg = readl(gate->reg);
+       reg &= ~(gate->cgr_mask << gate->bit_idx);
+       if (enable)
+               reg |= (gate->cgr_val & gate->cgr_mask) << gate->bit_idx;
+       writel(reg, gate->reg);
+}
+
+static int clk_gate2_enable(struct clk_hw *hw)
+{
+       struct clk_gate2 *gate = to_clk_gate2(hw);
        unsigned long flags;
-       int ret = 0;
 
        spin_lock_irqsave(gate->lock, flags);
 
        if (gate->share_count && (*gate->share_count)++ > 0)
                goto out;
 
-       if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT) {
-               ret = clk_gate_ops.enable(hw);
-       } else {
-               reg = readl(gate->reg);
-               reg &= ~(3 << gate->bit_idx);
-               reg |= gate->cgr_val << gate->bit_idx;
-               writel(reg, gate->reg);
-       }
-
+       clk_gate2_do_shared_clks(hw, true);
 out:
        spin_unlock_irqrestore(gate->lock, flags);
 
-       return ret;
+       return 0;
 }
 
 static void clk_gate2_disable(struct clk_hw *hw)
 {
        struct clk_gate2 *gate = to_clk_gate2(hw);
-       u32 reg;
        unsigned long flags;
 
        spin_lock_irqsave(gate->lock, flags);
@@ -79,23 +81,17 @@ static void clk_gate2_disable(struct clk_hw *hw)
                        goto out;
        }
 
-       if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT) {
-               clk_gate_ops.disable(hw);
-       } else {
-               reg = readl(gate->reg);
-               reg &= ~(3 << gate->bit_idx);
-               writel(reg, gate->reg);
-       }
-
+       clk_gate2_do_shared_clks(hw, false);
 out:
        spin_unlock_irqrestore(gate->lock, flags);
 }
 
-static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx)
+static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx,
+                                       u8 cgr_val, u8 cgr_mask)
 {
        u32 val = readl(reg);
 
-       if (((val >> bit_idx) & 1) == 1)
+       if (((val >> bit_idx) & cgr_mask) == cgr_val)
                return 1;
 
        return 0;
@@ -104,29 +100,28 @@ static int clk_gate2_reg_is_enabled(void __iomem *reg, u8 bit_idx)
 static int clk_gate2_is_enabled(struct clk_hw *hw)
 {
        struct clk_gate2 *gate = to_clk_gate2(hw);
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(gate->lock, flags);
 
-       if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT)
-               return clk_gate_ops.is_enabled(hw);
+       ret = clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx,
+                                       gate->cgr_val, gate->cgr_mask);
 
-       return clk_gate2_reg_is_enabled(gate->reg, gate->bit_idx);
+       spin_unlock_irqrestore(gate->lock, flags);
+
+       return ret;
 }
 
 static void clk_gate2_disable_unused(struct clk_hw *hw)
 {
        struct clk_gate2 *gate = to_clk_gate2(hw);
        unsigned long flags;
-       u32 reg;
-
-       if (gate->flags & IMX_CLK_GATE2_SINGLE_BIT)
-               return;
 
        spin_lock_irqsave(gate->lock, flags);
 
-       if (!gate->share_count || *gate->share_count == 0) {
-               reg = readl(gate->reg);
-               reg &= ~(3 << gate->bit_idx);
-               writel(reg, gate->reg);
-       }
+       if (!gate->share_count || *gate->share_count == 0)
+               clk_gate2_do_shared_clks(hw, false);
 
        spin_unlock_irqrestore(gate->lock, flags);
 }
@@ -140,7 +135,7 @@ static const struct clk_ops clk_gate2_ops = {
 
 struct clk_hw *clk_hw_register_gate2(struct device *dev, const char *name,
                const char *parent_name, unsigned long flags,
-               void __iomem *reg, u8 bit_idx, u8 cgr_val,
+               void __iomem *reg, u8 bit_idx, u8 cgr_val, u8 cgr_mask,
                u8 clk_gate2_flags, spinlock_t *lock,
                unsigned int *share_count)
 {
@@ -157,6 +152,7 @@ struct clk_hw *clk_hw_register_gate2(struct device *dev, const char *name,
        gate->reg = reg;
        gate->bit_idx = bit_idx;
        gate->cgr_val = cgr_val;
+       gate->cgr_mask = cgr_mask;
        gate->flags = clk_gate2_flags;
        gate->lock = lock;
        gate->share_count = share_count;
index f358ad9..7c90586 100644 (file)
@@ -653,7 +653,7 @@ static struct platform_driver imx8mm_clk_driver = {
                 * reloading the driver will crash or break devices.
                 */
                .suppress_bind_attrs = true,
-               .of_match_table = of_match_ptr(imx8mm_clk_of_match),
+               .of_match_table = imx8mm_clk_of_match,
        },
 };
 module_platform_driver(imx8mm_clk_driver);
index f3c5e6c..3c21db9 100644 (file)
@@ -604,7 +604,7 @@ static struct platform_driver imx8mn_clk_driver = {
                 * reloading the driver will crash or break devices.
                 */
                .suppress_bind_attrs = true,
-               .of_match_table = of_match_ptr(imx8mn_clk_of_match),
+               .of_match_table = imx8mn_clk_of_match,
        },
 };
 module_platform_driver(imx8mn_clk_driver);
index 48e2124..2f4e1d6 100644 (file)
@@ -425,7 +425,7 @@ static struct clk **uart_clks[ARRAY_SIZE(uart_clk_ids) + 1];
 static int imx8mp_clocks_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
-       struct device_node *np = dev->of_node;
+       struct device_node *np;
        void __iomem *anatop_base, *ccm_base;
        int i;
 
@@ -763,7 +763,7 @@ static struct platform_driver imx8mp_clk_driver = {
                 * reloading the driver will crash or break devices.
                 */
                .suppress_bind_attrs = true,
-               .of_match_table = of_match_ptr(imx8mp_clk_of_match),
+               .of_match_table = imx8mp_clk_of_match,
        },
 };
 module_platform_driver(imx8mp_clk_driver);
index 06292d4..779ea69 100644 (file)
@@ -639,7 +639,7 @@ static struct platform_driver imx8mq_clk_driver = {
                 * reloading the driver will crash or break devices.
                 */
                .suppress_bind_attrs = true,
-               .of_match_table = of_match_ptr(imx8mq_clk_of_match),
+               .of_match_table = imx8mq_clk_of_match,
        },
 };
 module_platform_driver(imx8mq_clk_driver);
index e947a70..d3e905c 100644 (file)
@@ -9,8 +9,10 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
 #include "clk-scu.h"
@@ -157,6 +159,135 @@ static const struct imx8qxp_ss_lpcg imx8qxp_ss_lsio = {
        .num_max = IMX_LSIO_LPCG_CLK_END,
 };
 
+#define IMX_LPCG_MAX_CLKS      8
+
+static struct clk_hw *imx_lpcg_of_clk_src_get(struct of_phandle_args *clkspec,
+                                             void *data)
+{
+       struct clk_hw_onecell_data *hw_data = data;
+       unsigned int idx = clkspec->args[0] / 4;
+
+       if (idx >= hw_data->num) {
+               pr_err("%s: invalid index %u\n", __func__, idx);
+               return ERR_PTR(-EINVAL);
+       }
+
+       return hw_data->hws[idx];
+}
+
+static int imx_lpcg_parse_clks_from_dt(struct platform_device *pdev,
+                                      struct device_node *np)
+{
+       const char *output_names[IMX_LPCG_MAX_CLKS];
+       const char *parent_names[IMX_LPCG_MAX_CLKS];
+       unsigned int bit_offset[IMX_LPCG_MAX_CLKS];
+       struct clk_hw_onecell_data *clk_data;
+       struct clk_hw **clk_hws;
+       struct resource *res;
+       void __iomem *base;
+       int count;
+       int idx;
+       int ret;
+       int i;
+
+       if (!of_device_is_compatible(np, "fsl,imx8qxp-lpcg"))
+               return -EINVAL;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       count = of_property_count_u32_elems(np, "clock-indices");
+       if (count < 0) {
+               dev_err(&pdev->dev, "failed to count clocks\n");
+               return -EINVAL;
+       }
+
+       /*
+        * A trick here is that we set the num of clks to the MAX instead
+        * of the count from clock-indices because one LPCG supports up to
+        * 8 clock outputs which each of them is fixed to 4 bits. Then we can
+        * easily get the clock by clk-indices (bit-offset) / 4.
+        * And the cost is very limited few pointers.
+        */
+
+       clk_data = devm_kzalloc(&pdev->dev, struct_size(clk_data, hws,
+                               IMX_LPCG_MAX_CLKS), GFP_KERNEL);
+       if (!clk_data)
+               return -ENOMEM;
+
+       clk_data->num = IMX_LPCG_MAX_CLKS;
+       clk_hws = clk_data->hws;
+
+       ret = of_property_read_u32_array(np, "clock-indices", bit_offset,
+                                        count);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "failed to read clock-indices\n");
+               return -EINVAL;
+       }
+
+       ret = of_clk_parent_fill(np, parent_names, count);
+       if (ret != count) {
+               dev_err(&pdev->dev, "failed to get clock parent names\n");
+               return count;
+       }
+
+       ret = of_property_read_string_array(np, "clock-output-names",
+                                           output_names, count);
+       if (ret != count) {
+               dev_err(&pdev->dev, "failed to read clock-output-names\n");
+               return -EINVAL;
+       }
+
+       pm_runtime_get_noresume(&pdev->dev);
+       pm_runtime_set_active(&pdev->dev);
+       pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
+       pm_runtime_use_autosuspend(&pdev->dev);
+       pm_runtime_enable(&pdev->dev);
+
+       for (i = 0; i < count; i++) {
+               idx = bit_offset[i] / 4;
+               if (idx > IMX_LPCG_MAX_CLKS) {
+                       dev_warn(&pdev->dev, "invalid bit offset of clock %d\n",
+                                i);
+                       ret = -EINVAL;
+                       goto unreg;
+               }
+
+               clk_hws[idx] = imx_clk_lpcg_scu_dev(&pdev->dev, output_names[i],
+                                                   parent_names[i], 0, base,
+                                                   bit_offset[i], false);
+               if (IS_ERR(clk_hws[idx])) {
+                       dev_warn(&pdev->dev, "failed to register clock %d\n",
+                                idx);
+                       ret = PTR_ERR(clk_hws[idx]);
+                       goto unreg;
+               }
+       }
+
+       ret = devm_of_clk_add_hw_provider(&pdev->dev, imx_lpcg_of_clk_src_get,
+                                         clk_data);
+       if (ret)
+               goto unreg;
+
+       pm_runtime_mark_last_busy(&pdev->dev);
+       pm_runtime_put_autosuspend(&pdev->dev);
+
+       return 0;
+
+unreg:
+       while (--i >= 0) {
+               idx = bit_offset[i] / 4;
+               if (clk_hws[idx])
+                       imx_clk_lpcg_scu_unregister(clk_hws[idx]);
+       }
+
+       pm_runtime_disable(&pdev->dev);
+
+       return ret;
+}
+
 static int imx8qxp_lpcg_clk_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -167,8 +298,14 @@ static int imx8qxp_lpcg_clk_probe(struct platform_device *pdev)
        struct resource *res;
        struct clk_hw **clks;
        void __iomem *base;
+       int ret;
        int i;
 
+       /* try new binding to parse clocks from device tree first */
+       ret = imx_lpcg_parse_clks_from_dt(pdev, np);
+       if (!ret)
+               return 0;
+
        ss_lpcg = of_device_get_match_data(dev);
        if (!ss_lpcg)
                return -ENODEV;
@@ -219,6 +356,7 @@ static const struct of_device_id imx8qxp_lpcg_match[] = {
        { .compatible = "fsl,imx8qxp-lpcg-adma", &imx8qxp_ss_adma, },
        { .compatible = "fsl,imx8qxp-lpcg-conn", &imx8qxp_ss_conn, },
        { .compatible = "fsl,imx8qxp-lpcg-lsio", &imx8qxp_ss_lsio, },
+       { .compatible = "fsl,imx8qxp-lpcg", NULL },
        { /* sentinel */ }
 };
 
@@ -226,6 +364,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = {
        .driver = {
                .name = "imx8qxp-lpcg-clk",
                .of_match_table = imx8qxp_lpcg_match,
+               .pm = &imx_clk_lpcg_scu_pm_ops,
                .suppress_bind_attrs = true,
        },
        .probe = imx8qxp_lpcg_clk_probe,
index d650ca3..5b3d4ed 100644 (file)
@@ -22,9 +22,10 @@ static int imx8qxp_clk_probe(struct platform_device *pdev)
        struct device_node *ccm_node = pdev->dev.of_node;
        struct clk_hw_onecell_data *clk_data;
        struct clk_hw **clks;
+       u32 clk_cells;
        int ret, i;
 
-       ret = imx_clk_scu_init();
+       ret = imx_clk_scu_init(ccm_node);
        if (ret)
                return ret;
 
@@ -33,6 +34,9 @@ static int imx8qxp_clk_probe(struct platform_device *pdev)
        if (!clk_data)
                return -ENOMEM;
 
+       if (of_property_read_u32(ccm_node, "#clock-cells", &clk_cells))
+               return -EINVAL;
+
        clk_data->num = IMX_SCU_CLK_END;
        clks = clk_data->hws;
 
@@ -55,78 +59,78 @@ static int imx8qxp_clk_probe(struct platform_device *pdev)
        clks[IMX_LSIO_BUS_CLK]          = clk_hw_register_fixed_rate(NULL, "lsio_bus_clk_root", NULL, 0, 100000000);
 
        /* ARM core */
-       clks[IMX_A35_CLK]               = imx_clk_scu("a35_clk", IMX_SC_R_A35, IMX_SC_PM_CLK_CPU);
+       clks[IMX_A35_CLK]               = imx_clk_scu("a35_clk", IMX_SC_R_A35, IMX_SC_PM_CLK_CPU, clk_cells);
 
        /* LSIO SS */
-       clks[IMX_LSIO_PWM0_CLK]         = imx_clk_scu("pwm0_clk", IMX_SC_R_PWM_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM1_CLK]         = imx_clk_scu("pwm1_clk", IMX_SC_R_PWM_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM2_CLK]         = imx_clk_scu("pwm2_clk", IMX_SC_R_PWM_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM3_CLK]         = imx_clk_scu("pwm3_clk", IMX_SC_R_PWM_3, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM4_CLK]         = imx_clk_scu("pwm4_clk", IMX_SC_R_PWM_4, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM5_CLK]         = imx_clk_scu("pwm5_clk", IMX_SC_R_PWM_5, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM6_CLK]         = imx_clk_scu("pwm6_clk", IMX_SC_R_PWM_6, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_PWM7_CLK]         = imx_clk_scu("pwm7_clk", IMX_SC_R_PWM_7, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_GPT0_CLK]         = imx_clk_scu("gpt0_clk", IMX_SC_R_GPT_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_GPT1_CLK]         = imx_clk_scu("gpt1_clk", IMX_SC_R_GPT_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_GPT2_CLK]         = imx_clk_scu("gpt2_clk", IMX_SC_R_GPT_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_GPT3_CLK]         = imx_clk_scu("gpt3_clk", IMX_SC_R_GPT_3, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_GPT4_CLK]         = imx_clk_scu("gpt4_clk", IMX_SC_R_GPT_4, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_FSPI0_CLK]        = imx_clk_scu("fspi0_clk", IMX_SC_R_FSPI_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_LSIO_FSPI1_CLK]        = imx_clk_scu("fspi1_clk", IMX_SC_R_FSPI_1, IMX_SC_PM_CLK_PER);
+       clks[IMX_LSIO_PWM0_CLK]         = imx_clk_scu("pwm0_clk", IMX_SC_R_PWM_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM1_CLK]         = imx_clk_scu("pwm1_clk", IMX_SC_R_PWM_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM2_CLK]         = imx_clk_scu("pwm2_clk", IMX_SC_R_PWM_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM3_CLK]         = imx_clk_scu("pwm3_clk", IMX_SC_R_PWM_3, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM4_CLK]         = imx_clk_scu("pwm4_clk", IMX_SC_R_PWM_4, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM5_CLK]         = imx_clk_scu("pwm5_clk", IMX_SC_R_PWM_5, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM6_CLK]         = imx_clk_scu("pwm6_clk", IMX_SC_R_PWM_6, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_PWM7_CLK]         = imx_clk_scu("pwm7_clk", IMX_SC_R_PWM_7, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_GPT0_CLK]         = imx_clk_scu("gpt0_clk", IMX_SC_R_GPT_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_GPT1_CLK]         = imx_clk_scu("gpt1_clk", IMX_SC_R_GPT_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_GPT2_CLK]         = imx_clk_scu("gpt2_clk", IMX_SC_R_GPT_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_GPT3_CLK]         = imx_clk_scu("gpt3_clk", IMX_SC_R_GPT_3, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_GPT4_CLK]         = imx_clk_scu("gpt4_clk", IMX_SC_R_GPT_4, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_FSPI0_CLK]        = imx_clk_scu("fspi0_clk", IMX_SC_R_FSPI_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_LSIO_FSPI1_CLK]        = imx_clk_scu("fspi1_clk", IMX_SC_R_FSPI_1, IMX_SC_PM_CLK_PER, clk_cells);
 
        /* ADMA SS */
-       clks[IMX_ADMA_UART0_CLK]        = imx_clk_scu("uart0_clk", IMX_SC_R_UART_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_UART1_CLK]        = imx_clk_scu("uart1_clk", IMX_SC_R_UART_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_UART2_CLK]        = imx_clk_scu("uart2_clk", IMX_SC_R_UART_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_UART3_CLK]        = imx_clk_scu("uart3_clk", IMX_SC_R_UART_3, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_SPI0_CLK]         = imx_clk_scu("spi0_clk",  IMX_SC_R_SPI_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_SPI1_CLK]         = imx_clk_scu("spi1_clk",  IMX_SC_R_SPI_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_SPI2_CLK]         = imx_clk_scu("spi2_clk",  IMX_SC_R_SPI_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_SPI3_CLK]         = imx_clk_scu("spi3_clk",  IMX_SC_R_SPI_3, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_CAN0_CLK]         = imx_clk_scu("can0_clk",  IMX_SC_R_CAN_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_I2C0_CLK]         = imx_clk_scu("i2c0_clk",  IMX_SC_R_I2C_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_I2C1_CLK]         = imx_clk_scu("i2c1_clk",  IMX_SC_R_I2C_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_I2C2_CLK]         = imx_clk_scu("i2c2_clk",  IMX_SC_R_I2C_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_I2C3_CLK]         = imx_clk_scu("i2c3_clk",  IMX_SC_R_I2C_3, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_FTM0_CLK]         = imx_clk_scu("ftm0_clk",  IMX_SC_R_FTM_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_FTM1_CLK]         = imx_clk_scu("ftm1_clk",  IMX_SC_R_FTM_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_ADC0_CLK]         = imx_clk_scu("adc0_clk",  IMX_SC_R_ADC_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_PWM_CLK]          = imx_clk_scu("pwm_clk",   IMX_SC_R_LCD_0_PWM_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_ADMA_LCD_CLK]          = imx_clk_scu("lcd_clk",   IMX_SC_R_LCD_0, IMX_SC_PM_CLK_PER);
+       clks[IMX_ADMA_UART0_CLK]        = imx_clk_scu("uart0_clk", IMX_SC_R_UART_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_UART1_CLK]        = imx_clk_scu("uart1_clk", IMX_SC_R_UART_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_UART2_CLK]        = imx_clk_scu("uart2_clk", IMX_SC_R_UART_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_UART3_CLK]        = imx_clk_scu("uart3_clk", IMX_SC_R_UART_3, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_SPI0_CLK]         = imx_clk_scu("spi0_clk",  IMX_SC_R_SPI_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_SPI1_CLK]         = imx_clk_scu("spi1_clk",  IMX_SC_R_SPI_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_SPI2_CLK]         = imx_clk_scu("spi2_clk",  IMX_SC_R_SPI_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_SPI3_CLK]         = imx_clk_scu("spi3_clk",  IMX_SC_R_SPI_3, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_CAN0_CLK]         = imx_clk_scu("can0_clk",  IMX_SC_R_CAN_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_I2C0_CLK]         = imx_clk_scu("i2c0_clk",  IMX_SC_R_I2C_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_I2C1_CLK]         = imx_clk_scu("i2c1_clk",  IMX_SC_R_I2C_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_I2C2_CLK]         = imx_clk_scu("i2c2_clk",  IMX_SC_R_I2C_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_I2C3_CLK]         = imx_clk_scu("i2c3_clk",  IMX_SC_R_I2C_3, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_FTM0_CLK]         = imx_clk_scu("ftm0_clk",  IMX_SC_R_FTM_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_FTM1_CLK]         = imx_clk_scu("ftm1_clk",  IMX_SC_R_FTM_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_ADC0_CLK]         = imx_clk_scu("adc0_clk",  IMX_SC_R_ADC_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_PWM_CLK]          = imx_clk_scu("pwm_clk",   IMX_SC_R_LCD_0_PWM_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_ADMA_LCD_CLK]          = imx_clk_scu("lcd_clk",   IMX_SC_R_LCD_0, IMX_SC_PM_CLK_PER, clk_cells);
 
        /* Connectivity */
-       clks[IMX_CONN_SDHC0_CLK]        = imx_clk_scu("sdhc0_clk", IMX_SC_R_SDHC_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_SDHC1_CLK]        = imx_clk_scu("sdhc1_clk", IMX_SC_R_SDHC_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_SDHC2_CLK]        = imx_clk_scu("sdhc2_clk", IMX_SC_R_SDHC_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_ENET0_ROOT_CLK]   = imx_clk_scu("enet0_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_ENET0_BYPASS_CLK] = imx_clk_scu("enet0_bypass_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_BYPASS);
-       clks[IMX_CONN_ENET0_RGMII_CLK]  = imx_clk_scu("enet0_rgmii_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_MISC0);
-       clks[IMX_CONN_ENET1_ROOT_CLK]   = imx_clk_scu("enet1_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_ENET1_BYPASS_CLK] = imx_clk_scu("enet1_bypass_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_BYPASS);
-       clks[IMX_CONN_ENET1_RGMII_CLK]  = imx_clk_scu("enet1_rgmii_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_MISC0);
-       clks[IMX_CONN_GPMI_BCH_IO_CLK]  = imx_clk_scu("gpmi_io_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_MST_BUS);
-       clks[IMX_CONN_GPMI_BCH_CLK]     = imx_clk_scu("gpmi_bch_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_USB2_ACLK]        = imx_clk_scu("usb3_aclk_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_PER);
-       clks[IMX_CONN_USB2_BUS_CLK]     = imx_clk_scu("usb3_bus_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MST_BUS);
-       clks[IMX_CONN_USB2_LPM_CLK]     = imx_clk_scu("usb3_lpm_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MISC);
+       clks[IMX_CONN_SDHC0_CLK]        = imx_clk_scu("sdhc0_clk", IMX_SC_R_SDHC_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_SDHC1_CLK]        = imx_clk_scu("sdhc1_clk", IMX_SC_R_SDHC_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_SDHC2_CLK]        = imx_clk_scu("sdhc2_clk", IMX_SC_R_SDHC_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_ENET0_ROOT_CLK]   = imx_clk_scu("enet0_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_ENET0_BYPASS_CLK] = imx_clk_scu("enet0_bypass_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_BYPASS, clk_cells);
+       clks[IMX_CONN_ENET0_RGMII_CLK]  = imx_clk_scu("enet0_rgmii_clk", IMX_SC_R_ENET_0, IMX_SC_PM_CLK_MISC0, clk_cells);
+       clks[IMX_CONN_ENET1_ROOT_CLK]   = imx_clk_scu("enet1_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_ENET1_BYPASS_CLK] = imx_clk_scu("enet1_bypass_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_BYPASS, clk_cells);
+       clks[IMX_CONN_ENET1_RGMII_CLK]  = imx_clk_scu("enet1_rgmii_clk", IMX_SC_R_ENET_1, IMX_SC_PM_CLK_MISC0, clk_cells);
+       clks[IMX_CONN_GPMI_BCH_IO_CLK]  = imx_clk_scu("gpmi_io_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_MST_BUS, clk_cells);
+       clks[IMX_CONN_GPMI_BCH_CLK]     = imx_clk_scu("gpmi_bch_clk", IMX_SC_R_NAND, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_USB2_ACLK]        = imx_clk_scu("usb3_aclk_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CONN_USB2_BUS_CLK]     = imx_clk_scu("usb3_bus_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MST_BUS, clk_cells);
+       clks[IMX_CONN_USB2_LPM_CLK]     = imx_clk_scu("usb3_lpm_div", IMX_SC_R_USB_2, IMX_SC_PM_CLK_MISC, clk_cells);
 
        /* Display controller SS */
-       clks[IMX_DC0_DISP0_CLK]         = imx_clk_scu("dc0_disp0_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC0);
-       clks[IMX_DC0_DISP1_CLK]         = imx_clk_scu("dc0_disp1_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1);
+       clks[IMX_DC0_DISP0_CLK]         = imx_clk_scu("dc0_disp0_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC0, clk_cells);
+       clks[IMX_DC0_DISP1_CLK]         = imx_clk_scu("dc0_disp1_clk", IMX_SC_R_DC_0, IMX_SC_PM_CLK_MISC1, clk_cells);
 
        /* MIPI-LVDS SS */
-       clks[IMX_MIPI0_I2C0_CLK]        = imx_clk_scu("mipi0_i2c0_clk", IMX_SC_R_MIPI_0_I2C_0, IMX_SC_PM_CLK_MISC2);
-       clks[IMX_MIPI0_I2C1_CLK]        = imx_clk_scu("mipi0_i2c1_clk", IMX_SC_R_MIPI_0_I2C_1, IMX_SC_PM_CLK_MISC2);
+       clks[IMX_MIPI0_I2C0_CLK]        = imx_clk_scu("mipi0_i2c0_clk", IMX_SC_R_MIPI_0_I2C_0, IMX_SC_PM_CLK_MISC2, clk_cells);
+       clks[IMX_MIPI0_I2C1_CLK]        = imx_clk_scu("mipi0_i2c1_clk", IMX_SC_R_MIPI_0_I2C_1, IMX_SC_PM_CLK_MISC2, clk_cells);
 
        /* MIPI CSI SS */
-       clks[IMX_CSI0_CORE_CLK]         = imx_clk_scu("mipi_csi0_core_clk", IMX_SC_R_CSI_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_CSI0_ESC_CLK]          = imx_clk_scu("mipi_csi0_esc_clk",  IMX_SC_R_CSI_0, IMX_SC_PM_CLK_MISC);
-       clks[IMX_CSI0_I2C0_CLK]         = imx_clk_scu("mipi_csi0_i2c0_clk", IMX_SC_R_CSI_0_I2C_0, IMX_SC_PM_CLK_PER);
-       clks[IMX_CSI0_PWM0_CLK]         = imx_clk_scu("mipi_csi0_pwm0_clk", IMX_SC_R_CSI_0_PWM_0, IMX_SC_PM_CLK_PER);
+       clks[IMX_CSI0_CORE_CLK]         = imx_clk_scu("mipi_csi0_core_clk", IMX_SC_R_CSI_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CSI0_ESC_CLK]          = imx_clk_scu("mipi_csi0_esc_clk",  IMX_SC_R_CSI_0, IMX_SC_PM_CLK_MISC, clk_cells);
+       clks[IMX_CSI0_I2C0_CLK]         = imx_clk_scu("mipi_csi0_i2c0_clk", IMX_SC_R_CSI_0_I2C_0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_CSI0_PWM0_CLK]         = imx_clk_scu("mipi_csi0_pwm0_clk", IMX_SC_R_CSI_0_PWM_0, IMX_SC_PM_CLK_PER, clk_cells);
 
        /* GPU SS */
-       clks[IMX_GPU0_CORE_CLK]         = imx_clk_scu("gpu_core0_clk",   IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_PER);
-       clks[IMX_GPU0_SHADER_CLK]       = imx_clk_scu("gpu_shader0_clk", IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_MISC);
+       clks[IMX_GPU0_CORE_CLK]         = imx_clk_scu("gpu_core0_clk",   IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_PER, clk_cells);
+       clks[IMX_GPU0_SHADER_CLK]       = imx_clk_scu("gpu_shader0_clk", IMX_SC_R_GPU_0_PID0, IMX_SC_PM_CLK_MISC, clk_cells);
 
        for (i = 0; i < clk_data->num; i++) {
                if (IS_ERR(clks[i]))
@@ -134,7 +138,19 @@ static int imx8qxp_clk_probe(struct platform_device *pdev)
                                i, PTR_ERR(clks[i]));
        }
 
-       return of_clk_add_hw_provider(ccm_node, of_clk_hw_onecell_get, clk_data);
+       if (clk_cells == 2) {
+               ret = of_clk_add_hw_provider(ccm_node, imx_scu_of_clk_src_get, imx_scu_clks);
+               if (ret)
+                       imx_clk_scu_unregister();
+       } else {
+               /*
+                * legacy binding code path doesn't unregister here because
+                * it will be removed later.
+                */
+               ret = of_clk_add_hw_provider(ccm_node, of_clk_hw_onecell_get, clk_data);
+       }
+
+       return ret;
 }
 
 static const struct of_device_id imx8qxp_match[] = {
index 1f0e44f..77be763 100644 (file)
@@ -34,6 +34,9 @@ struct clk_lpcg_scu {
        void __iomem *reg;
        u8 bit_idx;
        bool hw_gate;
+
+       /* for state save&restore */
+       u32 state;
 };
 
 #define to_clk_lpcg_scu(_hw) container_of(_hw, struct clk_lpcg_scu, hw)
@@ -81,9 +84,9 @@ static const struct clk_ops clk_lpcg_scu_ops = {
        .disable = clk_lpcg_scu_disable,
 };
 
-struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name,
-                               unsigned long flags, void __iomem *reg,
-                               u8 bit_idx, bool hw_gate)
+struct clk_hw *__imx_clk_lpcg_scu(struct device *dev, const char *name,
+                                 const char *parent_name, unsigned long flags,
+                                 void __iomem *reg, u8 bit_idx, bool hw_gate)
 {
        struct clk_lpcg_scu *clk;
        struct clk_init_data init;
@@ -107,11 +110,53 @@ struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name,
        clk->hw.init = &init;
 
        hw = &clk->hw;
-       ret = clk_hw_register(NULL, hw);
+       ret = clk_hw_register(dev, hw);
        if (ret) {
                kfree(clk);
                hw = ERR_PTR(ret);
        }
 
+       if (dev)
+               dev_set_drvdata(dev, clk);
+
        return hw;
 }
+
+void imx_clk_lpcg_scu_unregister(struct clk_hw *hw)
+{
+       struct clk_lpcg_scu *clk = to_clk_lpcg_scu(hw);
+
+       clk_hw_unregister(&clk->hw);
+       kfree(clk);
+}
+
+static int __maybe_unused imx_clk_lpcg_scu_suspend(struct device *dev)
+{
+       struct clk_lpcg_scu *clk = dev_get_drvdata(dev);
+
+       clk->state = readl_relaxed(clk->reg);
+       dev_dbg(dev, "save lpcg state 0x%x\n", clk->state);
+
+       return 0;
+}
+
+static int __maybe_unused imx_clk_lpcg_scu_resume(struct device *dev)
+{
+       struct clk_lpcg_scu *clk = dev_get_drvdata(dev);
+
+       /*
+        * FIXME: Sometimes writes don't work unless the CPU issues
+        * them twice
+        */
+
+       writel(clk->state, clk->reg);
+       writel(clk->state, clk->reg);
+       dev_dbg(dev, "restore lpcg state 0x%x\n", clk->state);
+
+       return 0;
+}
+
+const struct dev_pm_ops imx_clk_lpcg_scu_pm_ops = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_clk_lpcg_scu_suspend,
+                                     imx_clk_lpcg_scu_resume)
+};
index aba36e4..2b5ed86 100644 (file)
@@ -416,7 +416,7 @@ struct clk_hw *imx_dev_clk_hw_pll14xx(struct device *dev, const char *name,
                       __func__, name);
                kfree(pll);
                return ERR_PTR(-EINVAL);
-       };
+       }
 
        pll->base = base;
        pll->hw.init = &init;
index b8b2072..1f5518b 100644 (file)
@@ -8,6 +8,10 @@
 #include <linux/arm-smccc.h>
 #include <linux/clk-provider.h>
 #include <linux/err.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 
 #include "clk-scu.h"
 #define IMX_SIP_SET_CPUFREQ            0x00
 
 static struct imx_sc_ipc *ccm_ipc_handle;
+static struct device_node *pd_np;
+static struct platform_driver imx_clk_scu_driver;
+
+struct imx_scu_clk_node {
+       const char *name;
+       u32 rsrc;
+       u8 clk_type;
+       const char * const *parents;
+       int num_parents;
+
+       struct clk_hw *hw;
+       struct list_head node;
+};
+
+struct list_head imx_scu_clks[IMX_SC_R_LAST];
 
 /*
  * struct clk_scu - Description of one SCU clock
@@ -27,6 +46,10 @@ struct clk_scu {
        struct clk_hw hw;
        u16 rsrc_id;
        u8 clk_type;
+
+       /* for state save&restore */
+       bool is_enabled;
+       u32 rate;
 };
 
 /*
@@ -128,9 +151,28 @@ static inline struct clk_scu *to_clk_scu(struct clk_hw *hw)
        return container_of(hw, struct clk_scu, hw);
 }
 
-int imx_clk_scu_init(void)
+int imx_clk_scu_init(struct device_node *np)
 {
-       return imx_scu_get_handle(&ccm_ipc_handle);
+       u32 clk_cells;
+       int ret, i;
+
+       ret = imx_scu_get_handle(&ccm_ipc_handle);
+       if (ret)
+               return ret;
+
+       of_property_read_u32(np, "#clock-cells", &clk_cells);
+
+       if (clk_cells == 2) {
+               for (i = 0; i < IMX_SC_R_LAST; i++)
+                       INIT_LIST_HEAD(&imx_scu_clks[i]);
+
+               /* pd_np will be used to attach power domains later */
+               pd_np = of_find_compatible_node(NULL, NULL, "fsl,scu-pd");
+               if (!pd_np)
+                       return -EINVAL;
+       }
+
+       return platform_driver_register(&imx_clk_scu_driver);
 }
 
 /*
@@ -344,8 +386,9 @@ static const struct clk_ops clk_scu_cpu_ops = {
        .unprepare = clk_scu_unprepare,
 };
 
-struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents,
-                            int num_parents, u32 rsrc_id, u8 clk_type)
+struct clk_hw *__imx_clk_scu(struct device *dev, const char *name,
+                            const char * const *parents, int num_parents,
+                            u32 rsrc_id, u8 clk_type)
 {
        struct clk_init_data init;
        struct clk_scu *clk;
@@ -379,11 +422,185 @@ struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents,
        clk->hw.init = &init;
 
        hw = &clk->hw;
-       ret = clk_hw_register(NULL, hw);
+       ret = clk_hw_register(dev, hw);
        if (ret) {
                kfree(clk);
                hw = ERR_PTR(ret);
        }
 
+       if (dev)
+               dev_set_drvdata(dev, clk);
+
        return hw;
 }
+
+struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec,
+                                     void *data)
+{
+       unsigned int rsrc = clkspec->args[0];
+       unsigned int idx = clkspec->args[1];
+       struct list_head *scu_clks = data;
+       struct imx_scu_clk_node *clk;
+
+       list_for_each_entry(clk, &scu_clks[rsrc], node) {
+               if (clk->clk_type == idx)
+                       return clk->hw;
+       }
+
+       return ERR_PTR(-ENODEV);
+}
+
+static int imx_clk_scu_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct imx_scu_clk_node *clk = dev_get_platdata(dev);
+       struct clk_hw *hw;
+       int ret;
+
+       pm_runtime_set_suspended(dev);
+       pm_runtime_set_autosuspend_delay(dev, 50);
+       pm_runtime_use_autosuspend(&pdev->dev);
+       pm_runtime_enable(dev);
+
+       ret = pm_runtime_get_sync(dev);
+       if (ret) {
+               pm_runtime_disable(dev);
+               return ret;
+       }
+
+       hw = __imx_clk_scu(dev, clk->name, clk->parents, clk->num_parents,
+                          clk->rsrc, clk->clk_type);
+       if (IS_ERR(hw)) {
+               pm_runtime_disable(dev);
+               return PTR_ERR(hw);
+       }
+
+       clk->hw = hw;
+       list_add_tail(&clk->node, &imx_scu_clks[clk->rsrc]);
+
+       pm_runtime_mark_last_busy(&pdev->dev);
+       pm_runtime_put_autosuspend(&pdev->dev);
+
+       dev_dbg(dev, "register SCU clock rsrc:%d type:%d\n", clk->rsrc,
+               clk->clk_type);
+
+       return 0;
+}
+
+static int __maybe_unused imx_clk_scu_suspend(struct device *dev)
+{
+       struct clk_scu *clk = dev_get_drvdata(dev);
+
+       clk->rate = clk_hw_get_rate(&clk->hw);
+       clk->is_enabled = clk_hw_is_enabled(&clk->hw);
+
+       if (clk->rate)
+               dev_dbg(dev, "save rate %d\n", clk->rate);
+
+       if (clk->is_enabled)
+               dev_dbg(dev, "save enabled state\n");
+
+       return 0;
+}
+
+static int __maybe_unused imx_clk_scu_resume(struct device *dev)
+{
+       struct clk_scu *clk = dev_get_drvdata(dev);
+       int ret = 0;
+
+       if (clk->rate) {
+               ret = clk_scu_set_rate(&clk->hw, clk->rate, 0);
+               dev_dbg(dev, "restore rate %d %s\n", clk->rate,
+                       !ret ? "success" : "failed");
+       }
+
+       if (clk->is_enabled) {
+               ret = clk_scu_prepare(&clk->hw);
+               dev_dbg(dev, "restore enabled state %s\n",
+                       !ret ? "success" : "failed");
+       }
+
+       return ret;
+}
+
+static const struct dev_pm_ops imx_clk_scu_pm_ops = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_clk_scu_suspend,
+                                     imx_clk_scu_resume)
+};
+
+static struct platform_driver imx_clk_scu_driver = {
+       .driver = {
+               .name = "imx-scu-clk",
+               .suppress_bind_attrs = true,
+               .pm = &imx_clk_scu_pm_ops,
+       },
+       .probe = imx_clk_scu_probe,
+};
+
+static int imx_clk_scu_attach_pd(struct device *dev, u32 rsrc_id)
+{
+       struct of_phandle_args genpdspec = {
+               .np = pd_np,
+               .args_count = 1,
+               .args[0] = rsrc_id,
+       };
+
+       if (rsrc_id == IMX_SC_R_A35 || rsrc_id == IMX_SC_R_A53 ||
+           rsrc_id == IMX_SC_R_A72)
+               return 0;
+
+       return of_genpd_add_device(&genpdspec, dev);
+}
+
+struct clk_hw *imx_clk_scu_alloc_dev(const char *name,
+                                    const char * const *parents,
+                                    int num_parents, u32 rsrc_id, u8 clk_type)
+{
+       struct imx_scu_clk_node clk = {
+               .name = name,
+               .rsrc = rsrc_id,
+               .clk_type = clk_type,
+               .parents = parents,
+               .num_parents = num_parents,
+       };
+       struct platform_device *pdev;
+       int ret;
+
+       pdev = platform_device_alloc(name, PLATFORM_DEVID_NONE);
+       if (!pdev) {
+               pr_err("%s: failed to allocate scu clk dev rsrc %d type %d\n",
+                      name, rsrc_id, clk_type);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       ret = platform_device_add_data(pdev, &clk, sizeof(clk));
+       if (ret) {
+               platform_device_put(pdev);
+               return ERR_PTR(ret);
+       }
+
+       pdev->driver_override = "imx-scu-clk";
+
+       ret = imx_clk_scu_attach_pd(&pdev->dev, rsrc_id);
+       if (ret)
+               pr_warn("%s: failed to attached the power domain %d\n",
+                       name, ret);
+
+       platform_device_add(pdev);
+
+       /* For API backwards compatiblilty, simply return NULL for success */
+       return NULL;
+}
+
+void imx_clk_scu_unregister(void)
+{
+       struct imx_scu_clk_node *clk;
+       int i;
+
+       for (i = 0; i < IMX_SC_R_LAST; i++) {
+               list_for_each_entry(clk, &imx_scu_clks[i], node) {
+                       clk_hw_unregister(clk->hw);
+                       kfree(clk);
+               }
+       }
+}
index 2bcfaf0..e835216 100644 (file)
@@ -8,25 +8,61 @@
 #define __IMX_CLK_SCU_H
 
 #include <linux/firmware/imx/sci.h>
+#include <linux/of.h>
 
-int imx_clk_scu_init(void);
+extern struct list_head imx_scu_clks[];
+extern const struct dev_pm_ops imx_clk_lpcg_scu_pm_ops;
 
-struct clk_hw *__imx_clk_scu(const char *name, const char * const *parents,
-                            int num_parents, u32 rsrc_id, u8 clk_type);
+int imx_clk_scu_init(struct device_node *np);
+struct clk_hw *imx_scu_of_clk_src_get(struct of_phandle_args *clkspec,
+                                     void *data);
+struct clk_hw *imx_clk_scu_alloc_dev(const char *name,
+                                    const char * const *parents,
+                                    int num_parents, u32 rsrc_id, u8 clk_type);
+
+struct clk_hw *__imx_clk_scu(struct device *dev, const char *name,
+                            const char * const *parents, int num_parents,
+                            u32 rsrc_id, u8 clk_type);
+
+void imx_clk_scu_unregister(void);
+
+struct clk_hw *__imx_clk_lpcg_scu(struct device *dev, const char *name,
+                                 const char *parent_name, unsigned long flags,
+                                 void __iomem *reg, u8 bit_idx, bool hw_gate);
+void imx_clk_lpcg_scu_unregister(struct clk_hw *hw);
 
 static inline struct clk_hw *imx_clk_scu(const char *name, u32 rsrc_id,
-                                        u8 clk_type)
+                                        u8 clk_type, u8 clk_cells)
 {
-       return __imx_clk_scu(name, NULL, 0, rsrc_id, clk_type);
+       if (clk_cells == 2)
+               return imx_clk_scu_alloc_dev(name, NULL, 0, rsrc_id, clk_type);
+       else
+               return __imx_clk_scu(NULL, name, NULL, 0, rsrc_id, clk_type);
 }
 
 static inline struct clk_hw *imx_clk_scu2(const char *name, const char * const *parents,
-                                         int num_parents, u32 rsrc_id, u8 clk_type)
+                                         int num_parents, u32 rsrc_id, u8 clk_type,
+                                         u8 clk_cells)
+{
+       if (clk_cells == 2)
+               return imx_clk_scu_alloc_dev(name, parents, num_parents, rsrc_id, clk_type);
+       else
+               return __imx_clk_scu(NULL, name, parents, num_parents, rsrc_id, clk_type);
+}
+
+static inline struct clk_hw *imx_clk_lpcg_scu_dev(struct device *dev, const char *name,
+                                                 const char *parent_name, unsigned long flags,
+                                                 void __iomem *reg, u8 bit_idx, bool hw_gate)
 {
-       return __imx_clk_scu(name, parents, num_parents, rsrc_id, clk_type);
+       return __imx_clk_lpcg_scu(dev, name, parent_name, flags, reg,
+                                 bit_idx, hw_gate);
 }
 
-struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name,
-                               unsigned long flags, void __iomem *reg,
-                               u8 bit_idx, bool hw_gate);
+static inline struct clk_hw *imx_clk_lpcg_scu(const char *name, const char *parent_name,
+                                             unsigned long flags, void __iomem *reg,
+                                             u8 bit_idx, bool hw_gate)
+{
+       return __imx_clk_lpcg_scu(NULL, name, parent_name, flags, reg,
+                                 bit_idx, hw_gate);
+}
 #endif
index 1d7be0c..4f04c82 100644 (file)
@@ -6,8 +6,6 @@
 #include <linux/spinlock.h>
 #include <linux/clk-provider.h>
 
-#define IMX_CLK_GATE2_SINGLE_BIT       1
-
 extern spinlock_t imx_ccm_lock;
 
 void imx_check_clocks(struct clk *clks[], unsigned int count);
@@ -68,9 +66,9 @@ extern struct imx_pll14xx_clk imx_1443x_dram_pll;
        to_clk(imx_clk_hw_cpu(name, parent_name, div, mux, pll, step))
 
 #define clk_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \
-                               cgr_val, clk_gate_flags, lock, share_count) \
+                               cgr_val, cgr_mask, clk_gate_flags, lock, share_count) \
        to_clk(clk_hw_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \
-                               cgr_val, clk_gate_flags, lock, share_count))
+                               cgr_val, cgr_mask, clk_gate_flags, lock, share_count))
 
 #define imx_clk_pllv3(type, name, parent_name, base, div_mask) \
        to_clk(imx_clk_hw_pllv3(type, name, parent_name, base, div_mask))
@@ -198,7 +196,7 @@ struct clk_hw *imx_clk_hw_pllv4(const char *name, const char *parent_name,
 
 struct clk_hw *clk_hw_register_gate2(struct device *dev, const char *name,
                const char *parent_name, unsigned long flags,
-               void __iomem *reg, u8 bit_idx, u8 cgr_val,
+               void __iomem *reg, u8 bit_idx, u8 cgr_val, u8 cgr_mask,
                u8 clk_gate_flags, spinlock_t *lock,
                unsigned int *share_count);
 
@@ -351,14 +349,14 @@ static inline struct clk_hw *imx_clk_hw_gate2(const char *name, const char *pare
                void __iomem *reg, u8 shift)
 {
        return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg,
-                       shift, 0x3, 0, &imx_ccm_lock, NULL);
+                       shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL);
 }
 
 static inline struct clk_hw *imx_clk_hw_gate2_flags(const char *name, const char *parent,
                void __iomem *reg, u8 shift, unsigned long flags)
 {
        return clk_hw_register_gate2(NULL, name, parent, flags | CLK_SET_RATE_PARENT, reg,
-                       shift, 0x3, 0, &imx_ccm_lock, NULL);
+                       shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL);
 }
 
 static inline struct clk_hw *imx_clk_hw_gate2_shared(const char *name,
@@ -366,7 +364,7 @@ static inline struct clk_hw *imx_clk_hw_gate2_shared(const char *name,
                unsigned int *share_count)
 {
        return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg,
-                       shift, 0x3, 0, &imx_ccm_lock, share_count);
+                       shift, 0x3, 0x3, 0, &imx_ccm_lock, share_count);
 }
 
 static inline struct clk_hw *imx_clk_hw_gate2_shared2(const char *name,
@@ -374,7 +372,7 @@ static inline struct clk_hw *imx_clk_hw_gate2_shared2(const char *name,
                unsigned int *share_count)
 {
        return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT |
-                                 CLK_OPS_PARENT_ENABLE, reg, shift, 0x3, 0,
+                                 CLK_OPS_PARENT_ENABLE, reg, shift, 0x3, 0x3, 0,
                                  &imx_ccm_lock, share_count);
 }
 
@@ -384,16 +382,15 @@ static inline struct clk_hw *imx_dev_clk_hw_gate_shared(struct device *dev,
                                unsigned int *share_count)
 {
        return clk_hw_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT |
-                                       CLK_OPS_PARENT_ENABLE, reg, shift, 0x3,
-                                       IMX_CLK_GATE2_SINGLE_BIT,
-                                       &imx_ccm_lock, share_count);
+                                       CLK_OPS_PARENT_ENABLE, reg, shift, 0x1,
+                                       0x1, 0, &imx_ccm_lock, share_count);
 }
 
 static inline struct clk *imx_clk_gate2_cgr(const char *name,
                const char *parent, void __iomem *reg, u8 shift, u8 cgr_val)
 {
        return clk_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg,
-                       shift, cgr_val, 0, &imx_ccm_lock, NULL);
+                       shift, cgr_val, 0x3, 0, &imx_ccm_lock, NULL);
 }
 
 static inline struct clk_hw *imx_clk_hw_gate3(const char *name, const char *parent,
@@ -421,7 +418,7 @@ static inline struct clk_hw *imx_clk_hw_gate4(const char *name, const char *pare
 {
        return clk_hw_register_gate2(NULL, name, parent,
                        CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
-                       reg, shift, 0x3, 0, &imx_ccm_lock, NULL);
+                       reg, shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL);
 }
 
 static inline struct clk_hw *imx_clk_hw_gate4_flags(const char *name,
@@ -430,7 +427,7 @@ static inline struct clk_hw *imx_clk_hw_gate4_flags(const char *name,
 {
        return clk_hw_register_gate2(NULL, name, parent,
                        flags | CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
-                       reg, shift, 0x3, 0, &imx_ccm_lock, NULL);
+                       reg, shift, 0x3, 0x3, 0, &imx_ccm_lock, NULL);
 }
 
 #define imx_clk_gate4_flags(name, parent, reg, shift, flags) \
index dac6edc..c8e9cb6 100644 (file)
@@ -392,15 +392,21 @@ static unsigned int
 ingenic_clk_calc_hw_div(const struct ingenic_cgu_clk_info *clk_info,
                        unsigned int div)
 {
-       unsigned int i;
+       unsigned int i, best_i = 0, best = (unsigned int)-1;
 
        for (i = 0; i < (1 << clk_info->div.bits)
                                && clk_info->div.div_table[i]; i++) {
-               if (clk_info->div.div_table[i] >= div)
-                       return i;
+               if (clk_info->div.div_table[i] >= div &&
+                   clk_info->div.div_table[i] < best) {
+                       best = clk_info->div.div_table[i];
+                       best_i = i;
+
+                       if (div == best)
+                               break;
+               }
        }
 
-       return i - 1;
+       return best_i;
 }
 
 static unsigned
index 14e127e..dcc1352 100644 (file)
@@ -155,7 +155,7 @@ const struct clk_ops mtk_mux_gate_clr_set_upd_ops = {
        .set_parent = mtk_clk_mux_set_parent_setclr_lock,
 };
 
-struct clk *mtk_clk_register_mux(const struct mtk_mux *mux,
+static struct clk *mtk_clk_register_mux(const struct mtk_mux *mux,
                                 struct regmap *regmap,
                                 spinlock_t *lock)
 {
index f5625f4..8e2f927 100644 (file)
@@ -77,10 +77,6 @@ extern const struct clk_ops mtk_mux_gate_clr_set_upd_ops;
                        _width, _gate, _upd_ofs, _upd,                  \
                        CLK_SET_RATE_PARENT)
 
-struct clk *mtk_clk_register_mux(const struct mtk_mux *mux,
-                                struct regmap *regmap,
-                                spinlock_t *lock);
-
 int mtk_clk_register_muxes(const struct mtk_mux *muxes,
                           int num, struct device_node *node,
                           spinlock_t *lock,
index 034da20..fc002c1 100644 (file)
@@ -58,7 +58,7 @@ config COMMON_CLK_MESON8B
          want peripherals and CPU frequency scaling to work.
 
 config COMMON_CLK_GXBB
-       bool "GXBB and GXL SoC clock controllers support"
+       tristate "GXBB and GXL SoC clock controllers support"
        depends on ARM64
        default y
        select COMMON_CLK_MESON_REGMAP
@@ -74,7 +74,7 @@ config COMMON_CLK_GXBB
          Say Y if you want peripherals and CPU frequency scaling to work.
 
 config COMMON_CLK_AXG
-       bool "AXG SoC clock controllers support"
+       tristate "AXG SoC clock controllers support"
        depends on ARM64
        default y
        select COMMON_CLK_MESON_REGMAP
@@ -100,7 +100,7 @@ config COMMON_CLK_AXG_AUDIO
          aka axg, Say Y if you want audio subsystem to work.
 
 config COMMON_CLK_G12A
-       bool "G12 and SM1 SoC clock controllers support"
+       tristate "G12 and SM1 SoC clock controllers support"
        depends on ARM64
        default y
        select COMMON_CLK_MESON_REGMAP
@@ -110,6 +110,7 @@ config COMMON_CLK_G12A
        select COMMON_CLK_MESON_AO_CLKC
        select COMMON_CLK_MESON_EE_CLKC
        select COMMON_CLK_MESON_CPU_DYNDIV
+       select COMMON_CLK_MESON_VID_PLL_DIV
        select MFD_SYSCON
        help
          Support for the clock controller on Amlogic S905D2, S905X2 and S905Y2
index b488b40..af6db43 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/mfd/syscon.h>
+#include <linux/module.h>
 #include "meson-aoclk.h"
 #include "axg-aoclk.h"
 
@@ -326,6 +327,7 @@ static const struct of_device_id axg_aoclkc_match_table[] = {
        },
        { }
 };
+MODULE_DEVICE_TABLE(of, axg_aoclkc_match_table);
 
 static struct platform_driver axg_aoclkc_driver = {
        .probe          = meson_aoclkc_probe,
@@ -335,4 +337,5 @@ static struct platform_driver axg_aoclkc_driver = {
        },
 };
 
-builtin_platform_driver(axg_aoclkc_driver);
+module_platform_driver(axg_aoclkc_driver);
+MODULE_LICENSE("GPL v2");
index 13fc000..0e44695 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/module.h>
 
 #include "clk-regmap.h"
 #include "clk-pll.h"
@@ -1026,6 +1027,743 @@ static struct clk_regmap axg_sd_emmc_c_clk0 = {
        },
 };
 
+/* VPU Clock */
+
+static const struct clk_hw *axg_vpu_parent_hws[] = {
+       &axg_fclk_div4.hw,
+       &axg_fclk_div3.hw,
+       &axg_fclk_div5.hw,
+       &axg_fclk_div7.hw,
+};
+
+static struct clk_regmap axg_vpu_0_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .mask = 0x3,
+               .shift = 9,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vpu_0_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_vpu_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_vpu_parent_hws),
+               /* We need a specific parent for VPU clock source, let it be set in DT */
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+static struct clk_regmap axg_vpu_0_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .shift = 0,
+               .width = 7,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vpu_0_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vpu_0_sel.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap axg_vpu_0 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .bit_idx = 8,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vpu_0",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vpu_0_div.hw },
+               .num_parents = 1,
+               /*
+                * We want to avoid CCF to disable the VPU clock if
+                * display has been set by Bootloader
+                */
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vpu_1_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .mask = 0x3,
+               .shift = 25,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vpu_1_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_vpu_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_vpu_parent_hws),
+               /* We need a specific parent for VPU clock source, let it be set in DT */
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+static struct clk_regmap axg_vpu_1_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .shift = 16,
+               .width = 7,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vpu_1_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vpu_1_sel.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap axg_vpu_1 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .bit_idx = 24,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vpu_1",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vpu_1_div.hw },
+               .num_parents = 1,
+               /*
+                * We want to avoid CCF to disable the VPU clock if
+                * display has been set by Bootloader
+                */
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vpu = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VPU_CLK_CNTL,
+               .mask = 1,
+               .shift = 31,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vpu",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vpu_0.hw,
+                       &axg_vpu_1.hw
+               },
+               .num_parents = 2,
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+/* VAPB Clock */
+
+static struct clk_regmap axg_vapb_0_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .mask = 0x3,
+               .shift = 9,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vapb_0_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_vpu_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_vpu_parent_hws),
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+static struct clk_regmap axg_vapb_0_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .shift = 0,
+               .width = 7,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vapb_0_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vapb_0_sel.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap axg_vapb_0 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .bit_idx = 8,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vapb_0",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vapb_0_div.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vapb_1_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .mask = 0x3,
+               .shift = 25,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vapb_1_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_vpu_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_vpu_parent_hws),
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+static struct clk_regmap axg_vapb_1_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .shift = 16,
+               .width = 7,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vapb_1_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vapb_1_sel.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap axg_vapb_1 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .bit_idx = 24,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vapb_1",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vapb_1_div.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vapb_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .mask = 1,
+               .shift = 31,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vapb_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vapb_0.hw,
+                       &axg_vapb_1.hw
+               },
+               .num_parents = 2,
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+static struct clk_regmap axg_vapb = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VAPBCLK_CNTL,
+               .bit_idx = 30,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vapb",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vapb_sel.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+/* Video Clocks */
+
+static const struct clk_hw *axg_vclk_parent_hws[] = {
+       &axg_gp0_pll.hw,
+       &axg_fclk_div4.hw,
+       &axg_fclk_div3.hw,
+       &axg_fclk_div5.hw,
+       &axg_fclk_div2.hw,
+       &axg_fclk_div7.hw,
+       &axg_mpll1.hw,
+};
+
+static struct clk_regmap axg_vclk_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .mask = 0x7,
+               .shift = 16,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_vclk_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_vclk_parent_hws),
+               .flags = CLK_SET_RATE_NO_REPARENT | CLK_GET_RATE_NOCACHE,
+       },
+};
+
+static struct clk_regmap axg_vclk2_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .mask = 0x7,
+               .shift = 16,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk2_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_vclk_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_vclk_parent_hws),
+               .flags = CLK_SET_RATE_NO_REPARENT | CLK_GET_RATE_NOCACHE,
+       },
+};
+
+static struct clk_regmap axg_vclk_input = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_DIV,
+               .bit_idx = 16,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk_input",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk_sel.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2_input = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_DIV,
+               .bit_idx = 16,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2_input",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2_sel.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VID_CLK_DIV,
+               .shift = 0,
+               .width = 8,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk_input.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_GET_RATE_NOCACHE,
+       },
+};
+
+static struct clk_regmap axg_vclk2_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VIID_CLK_DIV,
+               .shift = 0,
+               .width = 8,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk2_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk2_input.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_GET_RATE_NOCACHE,
+       },
+};
+
+static struct clk_regmap axg_vclk = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .bit_idx = 19,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk_div.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .bit_idx = 19,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2_div.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk_div1 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .bit_idx = 0,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk_div1",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk_div2_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .bit_idx = 1,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk_div2_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk_div4_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .bit_idx = 2,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk_div4_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk_div6_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .bit_idx = 3,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk_div6_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk_div12_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL,
+               .bit_idx = 4,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk_div12_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2_div1 = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .bit_idx = 0,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2_div1",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2_div2_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .bit_idx = 1,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2_div2_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2_div4_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .bit_idx = 2,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2_div4_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2_div6_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .bit_idx = 3,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2_div6_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_regmap axg_vclk2_div12_en = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VIID_CLK_CNTL,
+               .bit_idx = 4,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vclk2_div12_en",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) { &axg_vclk2.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk_div2 = {
+       .mult = 1,
+       .div = 2,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk_div2",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk_div2_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk_div4 = {
+       .mult = 1,
+       .div = 4,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk_div4",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk_div4_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk_div6 = {
+       .mult = 1,
+       .div = 6,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk_div6",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk_div6_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk_div12 = {
+       .mult = 1,
+       .div = 12,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk_div12",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk_div12_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk2_div2 = {
+       .mult = 1,
+       .div = 2,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk2_div2",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk2_div2_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk2_div4 = {
+       .mult = 1,
+       .div = 4,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk2_div4",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk2_div4_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk2_div6 = {
+       .mult = 1,
+       .div = 6,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk2_div6",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk2_div6_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static struct clk_fixed_factor axg_vclk2_div12 = {
+       .mult = 1,
+       .div = 12,
+       .hw.init = &(struct clk_init_data){
+               .name = "vclk2_div12",
+               .ops = &clk_fixed_factor_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vclk2_div12_en.hw
+               },
+               .num_parents = 1,
+       },
+};
+
+static u32 mux_table_cts_sel[] = { 0, 1, 2, 3, 4, 8, 9, 10, 11, 12 };
+static const struct clk_hw *axg_cts_parent_hws[] = {
+       &axg_vclk_div1.hw,
+       &axg_vclk_div2.hw,
+       &axg_vclk_div4.hw,
+       &axg_vclk_div6.hw,
+       &axg_vclk_div12.hw,
+       &axg_vclk2_div1.hw,
+       &axg_vclk2_div2.hw,
+       &axg_vclk2_div4.hw,
+       &axg_vclk2_div6.hw,
+       &axg_vclk2_div12.hw,
+};
+
+static struct clk_regmap axg_cts_encl_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VIID_CLK_DIV,
+               .mask = 0xf,
+               .shift = 12,
+               .table = mux_table_cts_sel,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "cts_encl_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = axg_cts_parent_hws,
+               .num_parents = ARRAY_SIZE(axg_cts_parent_hws),
+               .flags = CLK_SET_RATE_NO_REPARENT | CLK_GET_RATE_NOCACHE,
+       },
+};
+
+static struct clk_regmap axg_cts_encl = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VID_CLK_CNTL2,
+               .bit_idx = 3,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "cts_encl",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_cts_encl_sel.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+       },
+};
+
+/* MIPI DSI Host Clock */
+
+static u32 mux_table_axg_vdin_meas[]    = { 0, 1, 2, 3, 6, 7 };
+static const struct clk_parent_data axg_vdin_meas_parent_data[] = {
+       { .fw_name = "xtal", },
+       { .hw = &axg_fclk_div4.hw },
+       { .hw = &axg_fclk_div3.hw },
+       { .hw = &axg_fclk_div5.hw },
+       { .hw = &axg_fclk_div2.hw },
+       { .hw = &axg_fclk_div7.hw },
+};
+
+static struct clk_regmap axg_vdin_meas_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_VDIN_MEAS_CLK_CNTL,
+               .mask = 0x7,
+               .shift = 21,
+               .flags = CLK_MUX_ROUND_CLOSEST,
+               .table = mux_table_axg_vdin_meas,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vdin_meas_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_data = axg_vdin_meas_parent_data,
+               .num_parents = ARRAY_SIZE(axg_vdin_meas_parent_data),
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap axg_vdin_meas_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_VDIN_MEAS_CLK_CNTL,
+               .shift = 12,
+               .width = 7,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "vdin_meas_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vdin_meas_sel.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap axg_vdin_meas = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_VDIN_MEAS_CLK_CNTL,
+               .bit_idx = 20,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "vdin_meas",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &axg_vdin_meas_div.hw },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
 static u32 mux_table_gen_clk[] = { 0, 4, 5, 6, 7, 8,
                                    9, 10, 11, 13, 14, };
 static const struct clk_parent_data gen_clk_parent_data[] = {
@@ -1246,6 +1984,52 @@ static struct clk_hw_onecell_data axg_hw_onecell_data = {
                [CLKID_HIFI_PLL_DCO]            = &axg_hifi_pll_dco.hw,
                [CLKID_PCIE_PLL_DCO]            = &axg_pcie_pll_dco.hw,
                [CLKID_PCIE_PLL_OD]             = &axg_pcie_pll_od.hw,
+               [CLKID_VPU_0_DIV]               = &axg_vpu_0_div.hw,
+               [CLKID_VPU_0_SEL]               = &axg_vpu_0_sel.hw,
+               [CLKID_VPU_0]                   = &axg_vpu_0.hw,
+               [CLKID_VPU_1_DIV]               = &axg_vpu_1_div.hw,
+               [CLKID_VPU_1_SEL]               = &axg_vpu_1_sel.hw,
+               [CLKID_VPU_1]                   = &axg_vpu_1.hw,
+               [CLKID_VPU]                     = &axg_vpu.hw,
+               [CLKID_VAPB_0_DIV]              = &axg_vapb_0_div.hw,
+               [CLKID_VAPB_0_SEL]              = &axg_vapb_0_sel.hw,
+               [CLKID_VAPB_0]                  = &axg_vapb_0.hw,
+               [CLKID_VAPB_1_DIV]              = &axg_vapb_1_div.hw,
+               [CLKID_VAPB_1_SEL]              = &axg_vapb_1_sel.hw,
+               [CLKID_VAPB_1]                  = &axg_vapb_1.hw,
+               [CLKID_VAPB_SEL]                = &axg_vapb_sel.hw,
+               [CLKID_VAPB]                    = &axg_vapb.hw,
+               [CLKID_VCLK]                    = &axg_vclk.hw,
+               [CLKID_VCLK2]                   = &axg_vclk2.hw,
+               [CLKID_VCLK_SEL]                = &axg_vclk_sel.hw,
+               [CLKID_VCLK2_SEL]               = &axg_vclk2_sel.hw,
+               [CLKID_VCLK_INPUT]              = &axg_vclk_input.hw,
+               [CLKID_VCLK2_INPUT]             = &axg_vclk2_input.hw,
+               [CLKID_VCLK_DIV]                = &axg_vclk_div.hw,
+               [CLKID_VCLK2_DIV]               = &axg_vclk2_div.hw,
+               [CLKID_VCLK_DIV2_EN]            = &axg_vclk_div2_en.hw,
+               [CLKID_VCLK_DIV4_EN]            = &axg_vclk_div4_en.hw,
+               [CLKID_VCLK_DIV6_EN]            = &axg_vclk_div6_en.hw,
+               [CLKID_VCLK_DIV12_EN]           = &axg_vclk_div12_en.hw,
+               [CLKID_VCLK2_DIV2_EN]           = &axg_vclk2_div2_en.hw,
+               [CLKID_VCLK2_DIV4_EN]           = &axg_vclk2_div4_en.hw,
+               [CLKID_VCLK2_DIV6_EN]           = &axg_vclk2_div6_en.hw,
+               [CLKID_VCLK2_DIV12_EN]          = &axg_vclk2_div12_en.hw,
+               [CLKID_VCLK_DIV1]               = &axg_vclk_div1.hw,
+               [CLKID_VCLK_DIV2]               = &axg_vclk_div2.hw,
+               [CLKID_VCLK_DIV4]               = &axg_vclk_div4.hw,
+               [CLKID_VCLK_DIV6]               = &axg_vclk_div6.hw,
+               [CLKID_VCLK_DIV12]              = &axg_vclk_div12.hw,
+               [CLKID_VCLK2_DIV1]              = &axg_vclk2_div1.hw,
+               [CLKID_VCLK2_DIV2]              = &axg_vclk2_div2.hw,
+               [CLKID_VCLK2_DIV4]              = &axg_vclk2_div4.hw,
+               [CLKID_VCLK2_DIV6]              = &axg_vclk2_div6.hw,
+               [CLKID_VCLK2_DIV12]             = &axg_vclk2_div12.hw,
+               [CLKID_CTS_ENCL_SEL]            = &axg_cts_encl_sel.hw,
+               [CLKID_CTS_ENCL]                = &axg_cts_encl.hw,
+               [CLKID_VDIN_MEAS_SEL]           = &axg_vdin_meas_sel.hw,
+               [CLKID_VDIN_MEAS_DIV]           = &axg_vdin_meas_div.hw,
+               [CLKID_VDIN_MEAS]               = &axg_vdin_meas.hw,
                [NR_CLKS]                       = NULL,
        },
        .num = NR_CLKS,
@@ -1341,6 +2125,42 @@ static struct clk_regmap *const axg_clk_regmaps[] = {
        &axg_hifi_pll_dco,
        &axg_pcie_pll_dco,
        &axg_pcie_pll_od,
+       &axg_vpu_0_div,
+       &axg_vpu_0_sel,
+       &axg_vpu_0,
+       &axg_vpu_1_div,
+       &axg_vpu_1_sel,
+       &axg_vpu_1,
+       &axg_vpu,
+       &axg_vapb_0_div,
+       &axg_vapb_0_sel,
+       &axg_vapb_0,
+       &axg_vapb_1_div,
+       &axg_vapb_1_sel,
+       &axg_vapb_1,
+       &axg_vapb_sel,
+       &axg_vapb,
+       &axg_vclk,
+       &axg_vclk2,
+       &axg_vclk_sel,
+       &axg_vclk2_sel,
+       &axg_vclk_input,
+       &axg_vclk2_input,
+       &axg_vclk_div,
+       &axg_vclk2_div,
+       &axg_vclk_div2_en,
+       &axg_vclk_div4_en,
+       &axg_vclk_div6_en,
+       &axg_vclk_div12_en,
+       &axg_vclk2_div2_en,
+       &axg_vclk2_div4_en,
+       &axg_vclk2_div6_en,
+       &axg_vclk2_div12_en,
+       &axg_cts_encl_sel,
+       &axg_cts_encl,
+       &axg_vdin_meas_sel,
+       &axg_vdin_meas_div,
+       &axg_vdin_meas,
 };
 
 static const struct meson_eeclkc_data axg_clkc_data = {
@@ -1354,6 +2174,7 @@ static const struct of_device_id clkc_match_table[] = {
        { .compatible = "amlogic,axg-clkc", .data = &axg_clkc_data },
        {}
 };
+MODULE_DEVICE_TABLE(of, clkc_match_table);
 
 static struct platform_driver axg_driver = {
        .probe          = meson_eeclkc_probe,
@@ -1363,4 +2184,5 @@ static struct platform_driver axg_driver = {
        },
 };
 
-builtin_platform_driver(axg_driver);
+module_platform_driver(axg_driver);
+MODULE_LICENSE("GPL v2");
index 0431dab..481b307 100644 (file)
 #define CLKID_HIFI_PLL_DCO                     88
 #define CLKID_PCIE_PLL_DCO                     89
 #define CLKID_PCIE_PLL_OD                      90
+#define CLKID_VPU_0_DIV                                91
+#define CLKID_VPU_1_DIV                                94
+#define CLKID_VAPB_0_DIV                       98
+#define CLKID_VAPB_1_DIV                       101
+#define CLKID_VCLK_SEL                         108
+#define CLKID_VCLK2_SEL                                109
+#define CLKID_VCLK_INPUT                       110
+#define CLKID_VCLK2_INPUT                      111
+#define CLKID_VCLK_DIV                         112
+#define CLKID_VCLK2_DIV                                113
+#define CLKID_VCLK_DIV2_EN                     114
+#define CLKID_VCLK_DIV4_EN                     115
+#define CLKID_VCLK_DIV6_EN                     116
+#define CLKID_VCLK_DIV12_EN                    117
+#define CLKID_VCLK2_DIV2_EN                    118
+#define CLKID_VCLK2_DIV4_EN                    119
+#define CLKID_VCLK2_DIV6_EN                    120
+#define CLKID_VCLK2_DIV12_EN                   121
+#define CLKID_CTS_ENCL_SEL                     132
+#define CLKID_VDIN_MEAS_SEL                    134
+#define CLKID_VDIN_MEAS_DIV                    135
 
-#define NR_CLKS                                        91
+#define NR_CLKS                                        137
 
 /* include the CLKIDs that have been made part of the DT binding */
 #include <dt-bindings/clock/axg-clkc.h>
index 6249956..b52990e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/mfd/syscon.h>
+#include <linux/module.h>
 #include "meson-aoclk.h"
 #include "g12a-aoclk.h"
 
@@ -461,6 +462,7 @@ static const struct of_device_id g12a_aoclkc_match_table[] = {
        },
        { }
 };
+MODULE_DEVICE_TABLE(of, g12a_aoclkc_match_table);
 
 static struct platform_driver g12a_aoclkc_driver = {
        .probe          = meson_aoclkc_probe,
@@ -470,4 +472,5 @@ static struct platform_driver g12a_aoclkc_driver = {
        },
 };
 
-builtin_platform_driver(g12a_aoclkc_driver);
+module_platform_driver(g12a_aoclkc_driver);
+MODULE_LICENSE("GPL v2");
index b814d44..b080359 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/module.h>
 
 #include "clk-mpll.h"
 #include "clk-pll.h"
@@ -3657,6 +3658,68 @@ static struct clk_regmap g12a_hdmi_tx = {
        },
 };
 
+/* MIPI DSI Host Clocks */
+
+static const struct clk_hw *g12a_mipi_dsi_pxclk_parent_hws[] = {
+       &g12a_vid_pll.hw,
+       &g12a_gp0_pll.hw,
+       &g12a_hifi_pll.hw,
+       &g12a_mpll1.hw,
+       &g12a_fclk_div2.hw,
+       &g12a_fclk_div2p5.hw,
+       &g12a_fclk_div3.hw,
+       &g12a_fclk_div7.hw,
+};
+
+static struct clk_regmap g12a_mipi_dsi_pxclk_sel = {
+       .data = &(struct clk_regmap_mux_data){
+               .offset = HHI_MIPIDSI_PHY_CLK_CNTL,
+               .mask = 0x7,
+               .shift = 12,
+               .flags = CLK_MUX_ROUND_CLOSEST,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "mipi_dsi_pxclk_sel",
+               .ops = &clk_regmap_mux_ops,
+               .parent_hws = g12a_mipi_dsi_pxclk_parent_hws,
+               .num_parents = ARRAY_SIZE(g12a_mipi_dsi_pxclk_parent_hws),
+               .flags = CLK_SET_RATE_NO_REPARENT,
+       },
+};
+
+static struct clk_regmap g12a_mipi_dsi_pxclk_div = {
+       .data = &(struct clk_regmap_div_data){
+               .offset = HHI_MIPIDSI_PHY_CLK_CNTL,
+               .shift = 0,
+               .width = 7,
+       },
+       .hw.init = &(struct clk_init_data){
+               .name = "mipi_dsi_pxclk_div",
+               .ops = &clk_regmap_divider_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &g12a_mipi_dsi_pxclk_sel.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
+static struct clk_regmap g12a_mipi_dsi_pxclk = {
+       .data = &(struct clk_regmap_gate_data){
+               .offset = HHI_MIPIDSI_PHY_CLK_CNTL,
+               .bit_idx = 8,
+       },
+       .hw.init = &(struct clk_init_data) {
+               .name = "mipi_dsi_pxclk",
+               .ops = &clk_regmap_gate_ops,
+               .parent_hws = (const struct clk_hw *[]) {
+                       &g12a_mipi_dsi_pxclk_div.hw
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       },
+};
+
 /* HDMI Clocks */
 
 static const struct clk_parent_data g12a_hdmi_parent_data[] = {
@@ -4402,6 +4465,9 @@ static struct clk_hw_onecell_data g12a_hw_onecell_data = {
                [CLKID_SPICC1_SCLK_SEL]         = &g12a_spicc1_sclk_sel.hw,
                [CLKID_SPICC1_SCLK_DIV]         = &g12a_spicc1_sclk_div.hw,
                [CLKID_SPICC1_SCLK]             = &g12a_spicc1_sclk.hw,
+               [CLKID_MIPI_DSI_PXCLK_SEL]      = &g12a_mipi_dsi_pxclk_sel.hw,
+               [CLKID_MIPI_DSI_PXCLK_DIV]      = &g12a_mipi_dsi_pxclk_div.hw,
+               [CLKID_MIPI_DSI_PXCLK]          = &g12a_mipi_dsi_pxclk.hw,
                [NR_CLKS]                       = NULL,
        },
        .num = NR_CLKS,
@@ -4657,6 +4723,9 @@ static struct clk_hw_onecell_data g12b_hw_onecell_data = {
                [CLKID_SPICC1_SCLK_SEL]         = &g12a_spicc1_sclk_sel.hw,
                [CLKID_SPICC1_SCLK_DIV]         = &g12a_spicc1_sclk_div.hw,
                [CLKID_SPICC1_SCLK]             = &g12a_spicc1_sclk.hw,
+               [CLKID_MIPI_DSI_PXCLK_SEL]      = &g12a_mipi_dsi_pxclk_sel.hw,
+               [CLKID_MIPI_DSI_PXCLK_DIV]      = &g12a_mipi_dsi_pxclk_div.hw,
+               [CLKID_MIPI_DSI_PXCLK]          = &g12a_mipi_dsi_pxclk.hw,
                [NR_CLKS]                       = NULL,
        },
        .num = NR_CLKS,
@@ -4903,6 +4972,9 @@ static struct clk_hw_onecell_data sm1_hw_onecell_data = {
                [CLKID_NNA_CORE_CLK_SEL]        = &sm1_nna_core_clk_sel.hw,
                [CLKID_NNA_CORE_CLK_DIV]        = &sm1_nna_core_clk_div.hw,
                [CLKID_NNA_CORE_CLK]            = &sm1_nna_core_clk.hw,
+               [CLKID_MIPI_DSI_PXCLK_SEL]      = &g12a_mipi_dsi_pxclk_sel.hw,
+               [CLKID_MIPI_DSI_PXCLK_DIV]      = &g12a_mipi_dsi_pxclk_div.hw,
+               [CLKID_MIPI_DSI_PXCLK]          = &g12a_mipi_dsi_pxclk.hw,
                [NR_CLKS]                       = NULL,
        },
        .num = NR_CLKS,
@@ -5150,16 +5222,20 @@ static struct clk_regmap *const g12a_clk_regmaps[] = {
        &sm1_nna_core_clk_sel,
        &sm1_nna_core_clk_div,
        &sm1_nna_core_clk,
+       &g12a_mipi_dsi_pxclk_sel,
+       &g12a_mipi_dsi_pxclk_div,
+       &g12a_mipi_dsi_pxclk,
 };
 
 static const struct reg_sequence g12a_init_regs[] = {
        { .reg = HHI_MPLL_CNTL0,        .def = 0x00000543 },
 };
 
-static int meson_g12a_dvfs_setup_common(struct platform_device *pdev,
+#define DVFS_CON_ID "dvfs"
+
+static int meson_g12a_dvfs_setup_common(struct device *dev,
                                        struct clk_hw **hws)
 {
-       const char *notifier_clk_name;
        struct clk *notifier_clk;
        struct clk_hw *xtal;
        int ret;
@@ -5168,21 +5244,22 @@ static int meson_g12a_dvfs_setup_common(struct platform_device *pdev,
 
        /* Setup clock notifier for cpu_clk_postmux0 */
        g12a_cpu_clk_postmux0_nb_data.xtal = xtal;
-       notifier_clk_name = clk_hw_get_name(&g12a_cpu_clk_postmux0.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk,
-                                   &g12a_cpu_clk_postmux0_nb_data.nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk_postmux0.hw,
+                                          DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12a_cpu_clk_postmux0_nb_data.nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpu_clk_postmux0 notifier\n");
+               dev_err(dev, "failed to register the cpu_clk_postmux0 notifier\n");
                return ret;
        }
 
        /* Setup clock notifier for cpu_clk_dyn mux */
-       notifier_clk_name = clk_hw_get_name(&g12a_cpu_clk_dyn.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk_dyn.hw,
+                                          DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12a_cpu_clk_mux_nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpu_clk_dyn notifier\n");
+               dev_err(dev, "failed to register the cpu_clk_dyn notifier\n");
                return ret;
        }
 
@@ -5192,33 +5269,34 @@ static int meson_g12a_dvfs_setup_common(struct platform_device *pdev,
 static int meson_g12b_dvfs_setup(struct platform_device *pdev)
 {
        struct clk_hw **hws = g12b_hw_onecell_data.hws;
-       const char *notifier_clk_name;
+       struct device *dev = &pdev->dev;
        struct clk *notifier_clk;
        struct clk_hw *xtal;
        int ret;
 
-       ret = meson_g12a_dvfs_setup_common(pdev, hws);
+       ret = meson_g12a_dvfs_setup_common(dev, hws);
        if (ret)
                return ret;
 
        xtal = clk_hw_get_parent_by_index(hws[CLKID_CPU_CLK_DYN1_SEL], 0);
 
        /* Setup clock notifier for cpu_clk mux */
-       notifier_clk_name = clk_hw_get_name(&g12b_cpu_clk.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpu_clk.hw,
+                                          DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12a_cpu_clk_mux_nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpu_clk notifier\n");
+               dev_err(dev, "failed to register the cpu_clk notifier\n");
                return ret;
        }
 
        /* Setup clock notifier for sys1_pll */
-       notifier_clk_name = clk_hw_get_name(&g12b_sys1_pll.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk,
-                                   &g12b_cpu_clk_sys1_pll_nb_data.nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12b_sys1_pll.hw,
+                                          DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12b_cpu_clk_sys1_pll_nb_data.nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the sys1_pll notifier\n");
+               dev_err(dev, "failed to register the sys1_pll notifier\n");
                return ret;
        }
 
@@ -5226,40 +5304,39 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev)
 
        /* Setup clock notifier for cpub_clk_postmux0 */
        g12b_cpub_clk_postmux0_nb_data.xtal = xtal;
-       notifier_clk_name = clk_hw_get_name(&g12b_cpub_clk_postmux0.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk,
-                                   &g12b_cpub_clk_postmux0_nb_data.nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk_postmux0.hw,
+                                          DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12b_cpub_clk_postmux0_nb_data.nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpub_clk_postmux0 notifier\n");
+               dev_err(dev, "failed to register the cpub_clk_postmux0 notifier\n");
                return ret;
        }
 
        /* Setup clock notifier for cpub_clk_dyn mux */
-       notifier_clk_name = clk_hw_get_name(&g12b_cpub_clk_dyn.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk_dyn.hw, "dvfs");
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12a_cpu_clk_mux_nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpub_clk_dyn notifier\n");
+               dev_err(dev, "failed to register the cpub_clk_dyn notifier\n");
                return ret;
        }
 
        /* Setup clock notifier for cpub_clk mux */
-       notifier_clk_name = clk_hw_get_name(&g12b_cpub_clk.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12b_cpub_clk.hw, DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12a_cpu_clk_mux_nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpub_clk notifier\n");
+               dev_err(dev, "failed to register the cpub_clk notifier\n");
                return ret;
        }
 
        /* Setup clock notifier for sys_pll */
-       notifier_clk_name = clk_hw_get_name(&g12a_sys_pll.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk,
-                                   &g12b_cpub_clk_sys_pll_nb_data.nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12a_sys_pll.hw, DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12b_cpub_clk_sys_pll_nb_data.nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the sys_pll notifier\n");
+               dev_err(dev, "failed to register the sys_pll notifier\n");
                return ret;
        }
 
@@ -5269,29 +5346,29 @@ static int meson_g12b_dvfs_setup(struct platform_device *pdev)
 static int meson_g12a_dvfs_setup(struct platform_device *pdev)
 {
        struct clk_hw **hws = g12a_hw_onecell_data.hws;
-       const char *notifier_clk_name;
+       struct device *dev = &pdev->dev;
        struct clk *notifier_clk;
        int ret;
 
-       ret = meson_g12a_dvfs_setup_common(pdev, hws);
+       ret = meson_g12a_dvfs_setup_common(dev, hws);
        if (ret)
                return ret;
 
        /* Setup clock notifier for cpu_clk mux */
-       notifier_clk_name = clk_hw_get_name(&g12a_cpu_clk.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk, &g12a_cpu_clk_mux_nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12a_cpu_clk.hw, DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                   &g12a_cpu_clk_mux_nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the cpu_clk notifier\n");
+               dev_err(dev, "failed to register the cpu_clk notifier\n");
                return ret;
        }
 
        /* Setup clock notifier for sys_pll */
-       notifier_clk_name = clk_hw_get_name(&g12a_sys_pll.hw);
-       notifier_clk = __clk_lookup(notifier_clk_name);
-       ret = clk_notifier_register(notifier_clk, &g12a_sys_pll_nb_data.nb);
+       notifier_clk = devm_clk_hw_get_clk(dev, &g12a_sys_pll.hw, DVFS_CON_ID);
+       ret = devm_clk_notifier_register(dev, notifier_clk,
+                                        &g12a_sys_pll_nb_data.nb);
        if (ret) {
-               dev_err(&pdev->dev, "failed to register the sys_pll notifier\n");
+               dev_err(dev, "failed to register the sys_pll notifier\n");
                return ret;
        }
 
@@ -5370,6 +5447,7 @@ static const struct of_device_id clkc_match_table[] = {
        },
        {}
 };
+MODULE_DEVICE_TABLE(of, clkc_match_table);
 
 static struct platform_driver g12a_driver = {
        .probe          = meson_g12a_probe,
@@ -5379,4 +5457,5 @@ static struct platform_driver g12a_driver = {
        },
 };
 
-builtin_platform_driver(g12a_driver);
+module_platform_driver(g12a_driver);
+MODULE_LICENSE("GPL v2");
index 69b6a69..a97613d 100644 (file)
 #define CLKID_NNA_AXI_CLK_DIV                  263
 #define CLKID_NNA_CORE_CLK_SEL                 265
 #define CLKID_NNA_CORE_CLK_DIV                 266
+#define CLKID_MIPI_DSI_PXCLK_DIV               268
 
-#define NR_CLKS                                        268
+#define NR_CLKS                                        271
 
 /* include the CLKIDs that have been made part of the DT binding */
 #include <dt-bindings/clock/g12a-clkc.h>
index e940861..fce95cf 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
+#include <linux/module.h>
 #include "meson-aoclk.h"
 #include "gxbb-aoclk.h"
 
@@ -287,6 +288,7 @@ static const struct of_device_id gxbb_aoclkc_match_table[] = {
        },
        { }
 };
+MODULE_DEVICE_TABLE(of, gxbb_aoclkc_match_table);
 
 static struct platform_driver gxbb_aoclkc_driver = {
        .probe          = meson_aoclkc_probe,
@@ -295,4 +297,5 @@ static struct platform_driver gxbb_aoclkc_driver = {
                .of_match_table = gxbb_aoclkc_match_table,
        },
 };
-builtin_platform_driver(gxbb_aoclkc_driver);
+module_platform_driver(gxbb_aoclkc_driver);
+MODULE_LICENSE("GPL v2");
index 0a68af6..d6eed76 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/module.h>
 
 #include "gxbb.h"
 #include "clk-regmap.h"
@@ -3519,6 +3520,7 @@ static const struct of_device_id clkc_match_table[] = {
        { .compatible = "amlogic,gxl-clkc", .data = &gxl_clkc_data },
        {},
 };
+MODULE_DEVICE_TABLE(of, clkc_match_table);
 
 static struct platform_driver gxbb_driver = {
        .probe          = meson_eeclkc_probe,
@@ -3528,4 +3530,5 @@ static struct platform_driver gxbb_driver = {
        },
 };
 
-builtin_platform_driver(gxbb_driver);
+module_platform_driver(gxbb_driver);
+MODULE_LICENSE("GPL v2");
index 3a6d84c..27cd2c1 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/reset-controller.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
+#include <linux/module.h>
+
 #include <linux/slab.h>
 #include "meson-aoclk.h"
 
@@ -84,3 +86,5 @@ int meson_aoclkc_probe(struct platform_device *pdev)
        return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
                (void *) data->hw_data);
 }
+EXPORT_SYMBOL_GPL(meson_aoclkc_probe);
+MODULE_LICENSE("GPL v2");
index a7cb1e7..8d5a5da 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
+#include <linux/module.h>
 
 #include "clk-regmap.h"
 #include "meson-eeclk.h"
@@ -54,3 +55,5 @@ int meson_eeclkc_probe(struct platform_device *pdev)
        return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
                                           data->hw_onecell_data);
 }
+EXPORT_SYMBOL_GPL(meson_eeclkc_probe);
+MODULE_LICENSE("GPL v2");
index e9e306d..4127135 100644 (file)
@@ -13,8 +13,8 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
-#define NB_GPIO1_LATCH 0xC
-#define XTAL_MODE          BIT(31)
+#define NB_GPIO1_LATCH 0x8
+#define XTAL_MODE          BIT(9)
 
 static int armada_3700_xtal_clock_probe(struct platform_device *pdev)
 {
index 3a965bd..d32bb12 100644 (file)
@@ -44,7 +44,7 @@ config QCOM_CLK_APCC_MSM8996
        help
          Support for the CPU clock controller on msm8996 devices.
          Say Y if you want to support CPU clock scaling using CPUfreq
-         drivers for dyanmic power management.
+         drivers for dynamic power management.
 
 config QCOM_CLK_RPM
        tristate "RPM based Clock Controller"
@@ -290,6 +290,15 @@ config QCS_GCC_404
          Say Y if you want to use multimedia devices or peripheral
          devices such as UART, SPI, I2C, USB, SD/eMMC, PCIe etc.
 
+config SC_CAMCC_7180
+       tristate "SC7180 Camera Clock Controller"
+       select SC_GCC_7180
+       help
+         Support for the camera clock controller on Qualcomm Technologies, Inc
+         SC7180 devices.
+         Say Y if you want to support camera devices and functionality such as
+         capturing pictures.
+
 config SC_DISPCC_7180
        tristate "SC7180 Display Clock Controller"
        select SC_GCC_7180
@@ -413,6 +422,14 @@ config SDM_LPASSCC_845
          Say Y if you want to use the LPASS branch clocks of the LPASS clock
          controller to reset the LPASS subsystem.
 
+config SDX_GCC_55
+       tristate "SDX55 Global Clock Controller"
+       select QCOM_GDSC
+       help
+         Support for the global clock controller on SDX55 devices.
+         Say Y if you want to use peripheral devices such as UART,
+         SPI, I2C, USB, SD/UFS, PCIe etc.
+
 config SM_DISPCC_8250
        tristate "SM8150 and SM8250 Display Clock Controller"
        depends on SM_GCC_8150 || SM_GCC_8250
@@ -502,4 +519,10 @@ config KRAITCC
          Support for the Krait CPU clocks on Qualcomm devices.
          Say Y if you want to support CPU frequency scaling.
 
+config CLK_GFM_LPASS_SM8250
+       tristate "SM8250 GFM LPASS Clocks"
+       help
+         Support for the Glitch Free Mux (GFM) Low power audio
+          subsystem (LPASS) clocks found on SM8250 SoCs.
+
 endif
index 11ae86f..9e5e0e3 100644 (file)
@@ -19,6 +19,7 @@ clk-qcom-$(CONFIG_QCOM_GDSC) += gdsc.o
 # Keep alphabetically sorted by config
 obj-$(CONFIG_APQ_GCC_8084) += gcc-apq8084.o
 obj-$(CONFIG_APQ_MMCC_8084) += mmcc-apq8084.o
+obj-$(CONFIG_CLK_GFM_LPASS_SM8250) += lpass-gfm-sm8250.o
 obj-$(CONFIG_IPQ_APSS_PLL) += apss-ipq-pll.o
 obj-$(CONFIG_IPQ_APSS_6018) += apss-ipq6018.o
 obj-$(CONFIG_IPQ_GCC_4019) += gcc-ipq4019.o
@@ -51,6 +52,7 @@ obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o
 obj-$(CONFIG_QCS_GCC_404) += gcc-qcs404.o
 obj-$(CONFIG_QCS_Q6SSTOP_404) += q6sstop-qcs404.o
 obj-$(CONFIG_QCS_TURING_404) += turingcc-qcs404.o
+obj-$(CONFIG_SC_CAMCC_7180) += camcc-sc7180.o
 obj-$(CONFIG_SC_DISPCC_7180) += dispcc-sc7180.o
 obj-$(CONFIG_SC_GCC_7180) += gcc-sc7180.o
 obj-$(CONFIG_SC_GPUCC_7180) += gpucc-sc7180.o
@@ -64,6 +66,7 @@ obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o
 obj-$(CONFIG_SDM_GPUCC_845) += gpucc-sdm845.o
 obj-$(CONFIG_SDM_LPASSCC_845) += lpasscc-sdm845.o
 obj-$(CONFIG_SDM_VIDEOCC_845) += videocc-sdm845.o
+obj-$(CONFIG_SDX_GCC_55) += gcc-sdx55.o
 obj-$(CONFIG_SM_DISPCC_8250) += dispcc-sm8250.o
 obj-$(CONFIG_SM_GCC_8150) += gcc-sm8150.o
 obj-$(CONFIG_SM_GCC_8250) += gcc-sm8250.o
diff --git a/drivers/clk/qcom/camcc-sc7180.c b/drivers/clk/qcom/camcc-sc7180.c
new file mode 100644 (file)
index 0000000..dbac565
--- /dev/null
@@ -0,0 +1,1732 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,camcc-sc7180.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "common.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+       P_BI_TCXO,
+       P_CAM_CC_PLL0_OUT_EVEN,
+       P_CAM_CC_PLL1_OUT_EVEN,
+       P_CAM_CC_PLL2_OUT_AUX,
+       P_CAM_CC_PLL2_OUT_EARLY,
+       P_CAM_CC_PLL3_OUT_MAIN,
+       P_CORE_BI_PLL_TEST_SE,
+};
+
+static const struct pll_vco agera_vco[] = {
+       { 600000000, 3300000000UL, 0 },
+};
+
+static const struct pll_vco fabia_vco[] = {
+       { 249600000, 2000000000UL, 0 },
+};
+
+/* 600MHz configuration */
+static const struct alpha_pll_config cam_cc_pll0_config = {
+       .l = 0x1f,
+       .alpha = 0x4000,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00002067,
+       .test_ctl_val = 0x40000000,
+       .user_ctl_hi_val = 0x00004805,
+       .user_ctl_val = 0x00000001,
+};
+
+static struct clk_alpha_pll cam_cc_pll0 = {
+       .offset = 0x0,
+       .vco_table = fabia_vco,
+       .num_vco = ARRAY_SIZE(fabia_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+       .clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_pll0",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_fabia_ops,
+               },
+       },
+};
+
+/* 860MHz configuration */
+static const struct alpha_pll_config cam_cc_pll1_config = {
+       .l = 0x2a,
+       .alpha = 0x1555,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00002067,
+       .test_ctl_val = 0x40000000,
+       .user_ctl_hi_val = 0x00004805,
+};
+
+static struct clk_alpha_pll cam_cc_pll1 = {
+       .offset = 0x1000,
+       .vco_table = fabia_vco,
+       .num_vco = ARRAY_SIZE(fabia_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+       .clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_pll1",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_fabia_ops,
+               },
+       },
+};
+
+/* 1920MHz configuration */
+static const struct alpha_pll_config cam_cc_pll2_config = {
+       .l = 0x64,
+       .config_ctl_val = 0x20000800,
+       .config_ctl_hi_val = 0x400003D2,
+       .test_ctl_val = 0x04000400,
+       .test_ctl_hi_val = 0x00004000,
+       .user_ctl_val = 0x0000030F,
+};
+
+static struct clk_alpha_pll cam_cc_pll2 = {
+       .offset = 0x2000,
+       .vco_table = agera_vco,
+       .num_vco = ARRAY_SIZE(agera_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_AGERA],
+       .clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_pll2",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_agera_ops,
+               },
+       },
+};
+
+static struct clk_fixed_factor cam_cc_pll2_out_early = {
+       .mult = 1,
+       .div = 2,
+       .hw.init = &(struct clk_init_data){
+               .name = "cam_cc_pll2_out_early",
+               .parent_names = (const char *[]){ "cam_cc_pll2" },
+               .num_parents = 1,
+               .ops = &clk_fixed_factor_ops,
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll2_out_aux[] = {
+       { 0x3, 4 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll2_out_aux = {
+       .offset = 0x2000,
+       .post_div_shift = 8,
+       .post_div_table = post_div_table_cam_cc_pll2_out_aux,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll2_out_aux),
+       .width = 2,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_AGERA],
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_pll2_out_aux",
+               .parent_data = &(const struct clk_parent_data){
+                       .hw = &cam_cc_pll2.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_ops,
+       },
+};
+
+/* 1080MHz configuration */
+static const struct alpha_pll_config cam_cc_pll3_config = {
+       .l = 0x38,
+       .alpha = 0x4000,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00002067,
+       .test_ctl_val = 0x40000000,
+       .user_ctl_hi_val = 0x00004805,
+};
+
+static struct clk_alpha_pll cam_cc_pll3 = {
+       .offset = 0x3000,
+       .vco_table = fabia_vco,
+       .num_vco = ARRAY_SIZE(fabia_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA],
+       .clkr = {
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_pll3",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_fabia_ops,
+               },
+       },
+};
+
+static const struct parent_map cam_cc_parent_map_0[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL1_OUT_EVEN, 2 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_0[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll1.clkr.hw },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map cam_cc_parent_map_1[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL2_OUT_AUX, 1 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_1[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll2_out_aux.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map cam_cc_parent_map_2[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL2_OUT_EARLY, 4 },
+       { P_CAM_CC_PLL3_OUT_MAIN, 5 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_2[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll2_out_early.hw },
+       { .hw = &cam_cc_pll3.clkr.hw },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map cam_cc_parent_map_3[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL1_OUT_EVEN, 2 },
+       { P_CAM_CC_PLL2_OUT_EARLY, 4 },
+       { P_CAM_CC_PLL3_OUT_MAIN, 5 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_3[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll1.clkr.hw },
+       { .hw = &cam_cc_pll2_out_early.hw },
+       { .hw = &cam_cc_pll3.clkr.hw },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map cam_cc_parent_map_4[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL3_OUT_MAIN, 5 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_4[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll3.clkr.hw },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map cam_cc_parent_map_5[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_5[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map cam_cc_parent_map_6[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL1_OUT_EVEN, 2 },
+       { P_CAM_CC_PLL3_OUT_MAIN, 5 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_6[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &cam_cc_pll1.clkr.hw },
+       { .hw = &cam_cc_pll3.clkr.hw },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se", .name = "core_bi_pll_test_se" },
+};
+
+static const struct freq_tbl ftbl_cam_cc_bps_clk_src[] = {
+       F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0),
+       F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0),
+       F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0),
+       F(480000000, P_CAM_CC_PLL2_OUT_EARLY, 2, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_bps_clk_src = {
+       .cmd_rcgr = 0x6010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_2,
+       .freq_tbl = ftbl_cam_cc_bps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_bps_clk_src",
+               .parent_data = cam_cc_parent_data_2,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_cci_0_clk_src[] = {
+       F(37500000, P_CAM_CC_PLL0_OUT_EVEN, 16, 0, 0),
+       F(50000000, P_CAM_CC_PLL0_OUT_EVEN, 12, 0, 0),
+       F(100000000, P_CAM_CC_PLL0_OUT_EVEN, 6, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_cci_0_clk_src = {
+       .cmd_rcgr = 0xb0d8,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_5,
+       .freq_tbl = ftbl_cam_cc_cci_0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_cci_0_clk_src",
+               .parent_data = cam_cc_parent_data_5,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_cci_1_clk_src = {
+       .cmd_rcgr = 0xb14c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_5,
+       .freq_tbl = ftbl_cam_cc_cci_0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_cci_1_clk_src",
+               .parent_data = cam_cc_parent_data_5,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_cphy_rx_clk_src[] = {
+       F(150000000, P_CAM_CC_PLL0_OUT_EVEN, 4, 0, 0),
+       F(270000000, P_CAM_CC_PLL3_OUT_MAIN, 4, 0, 0),
+       F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_cphy_rx_clk_src = {
+       .cmd_rcgr = 0x9064,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_3,
+       .freq_tbl = ftbl_cam_cc_cphy_rx_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_cphy_rx_clk_src",
+               .parent_data = cam_cc_parent_data_3,
+               .num_parents = 6,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_csi0phytimer_clk_src[] = {
+       F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_csi0phytimer_clk_src = {
+       .cmd_rcgr = 0x5004,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_csi0phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi1phytimer_clk_src = {
+       .cmd_rcgr = 0x5028,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_csi1phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi2phytimer_clk_src = {
+       .cmd_rcgr = 0x504c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_csi2phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi3phytimer_clk_src = {
+       .cmd_rcgr = 0x5070,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_csi3phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_fast_ahb_clk_src[] = {
+       F(100000000, P_CAM_CC_PLL0_OUT_EVEN, 6, 0, 0),
+       F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0),
+       F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0),
+       F(404000000, P_CAM_CC_PLL1_OUT_EVEN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_fast_ahb_clk_src = {
+       .cmd_rcgr = 0x603c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_fast_ahb_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_fast_ahb_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_icp_clk_src[] = {
+       F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0),
+       F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0),
+       F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0),
+       F(480000000, P_CAM_CC_PLL2_OUT_EARLY, 2, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_icp_clk_src = {
+       .cmd_rcgr = 0xb088,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_2,
+       .freq_tbl = ftbl_cam_cc_icp_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_icp_clk_src",
+               .parent_data = cam_cc_parent_data_2,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ife_0_clk_src[] = {
+       F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0),
+       F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0),
+       F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ife_0_clk_src = {
+       .cmd_rcgr = 0x9010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_4,
+       .freq_tbl = ftbl_cam_cc_ife_0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ife_0_clk_src",
+               .parent_data = cam_cc_parent_data_4,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ife_0_csid_clk_src[] = {
+       F(150000000, P_CAM_CC_PLL0_OUT_EVEN, 4, 0, 0),
+       F(270000000, P_CAM_CC_PLL3_OUT_MAIN, 4, 0, 0),
+       F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0),
+       F(480000000, P_CAM_CC_PLL2_OUT_EARLY, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ife_0_csid_clk_src = {
+       .cmd_rcgr = 0x903c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_3,
+       .freq_tbl = ftbl_cam_cc_ife_0_csid_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ife_0_csid_clk_src",
+               .parent_data = cam_cc_parent_data_3,
+               .num_parents = 6,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_ife_1_clk_src = {
+       .cmd_rcgr = 0xa010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_4,
+       .freq_tbl = ftbl_cam_cc_ife_0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ife_1_clk_src",
+               .parent_data = cam_cc_parent_data_4,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_ife_1_csid_clk_src = {
+       .cmd_rcgr = 0xa034,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_3,
+       .freq_tbl = ftbl_cam_cc_ife_0_csid_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ife_1_csid_clk_src",
+               .parent_data = cam_cc_parent_data_3,
+               .num_parents = 6,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_ife_lite_clk_src = {
+       .cmd_rcgr = 0xb004,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_4,
+       .freq_tbl = ftbl_cam_cc_ife_0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ife_lite_clk_src",
+               .parent_data = cam_cc_parent_data_4,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_ife_lite_csid_clk_src = {
+       .cmd_rcgr = 0xb024,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_3,
+       .freq_tbl = ftbl_cam_cc_ife_0_csid_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ife_lite_csid_clk_src",
+               .parent_data = cam_cc_parent_data_3,
+               .num_parents = 6,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ipe_0_clk_src[] = {
+       F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0),
+       F(360000000, P_CAM_CC_PLL3_OUT_MAIN, 3, 0, 0),
+       F(432000000, P_CAM_CC_PLL3_OUT_MAIN, 2.5, 0, 0),
+       F(540000000, P_CAM_CC_PLL3_OUT_MAIN, 2, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ipe_0_clk_src = {
+       .cmd_rcgr = 0x7010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_2,
+       .freq_tbl = ftbl_cam_cc_ipe_0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_ipe_0_clk_src",
+               .parent_data = cam_cc_parent_data_2,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_jpeg_clk_src[] = {
+       F(66666667, P_CAM_CC_PLL0_OUT_EVEN, 9, 0, 0),
+       F(133333333, P_CAM_CC_PLL0_OUT_EVEN, 4.5, 0, 0),
+       F(216000000, P_CAM_CC_PLL3_OUT_MAIN, 5, 0, 0),
+       F(320000000, P_CAM_CC_PLL2_OUT_EARLY, 3, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_jpeg_clk_src = {
+       .cmd_rcgr = 0xb04c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_2,
+       .freq_tbl = ftbl_cam_cc_jpeg_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_jpeg_clk_src",
+               .parent_data = cam_cc_parent_data_2,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_lrme_clk_src[] = {
+       F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0),
+       F(216000000, P_CAM_CC_PLL3_OUT_MAIN, 5, 0, 0),
+       F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0),
+       F(404000000, P_CAM_CC_PLL1_OUT_EVEN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_lrme_clk_src = {
+       .cmd_rcgr = 0xb0f8,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_6,
+       .freq_tbl = ftbl_cam_cc_lrme_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_lrme_clk_src",
+               .parent_data = cam_cc_parent_data_6,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_mclk0_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(24000000, P_CAM_CC_PLL2_OUT_AUX, 10, 1, 2),
+       F(64000000, P_CAM_CC_PLL2_OUT_AUX, 7.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_mclk0_clk_src = {
+       .cmd_rcgr = 0x4004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_mclk0_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk1_clk_src = {
+       .cmd_rcgr = 0x4024,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_mclk1_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk2_clk_src = {
+       .cmd_rcgr = 0x4044,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_mclk2_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk3_clk_src = {
+       .cmd_rcgr = 0x4064,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_mclk3_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk4_clk_src = {
+       .cmd_rcgr = 0x4084,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_mclk4_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_slow_ahb_clk_src[] = {
+       F(80000000, P_CAM_CC_PLL0_OUT_EVEN, 7.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_slow_ahb_clk_src = {
+       .cmd_rcgr = 0x6058,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_slow_ahb_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "cam_cc_slow_ahb_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = 4,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_branch cam_cc_bps_ahb_clk = {
+       .halt_reg = 0x6070,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6070,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_bps_ahb_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_bps_areg_clk = {
+       .halt_reg = 0x6054,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6054,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_bps_areg_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_bps_axi_clk = {
+       .halt_reg = 0x6038,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6038,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_bps_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_bps_clk = {
+       .halt_reg = 0x6028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x6028,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_bps_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_bps_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_camnoc_axi_clk = {
+       .halt_reg = 0xb124,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb124,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_camnoc_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cci_0_clk = {
+       .halt_reg = 0xb0f0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb0f0,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_cci_0_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cci_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cci_1_clk = {
+       .halt_reg = 0xb164,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb164,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_cci_1_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cci_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_core_ahb_clk = {
+       .halt_reg = 0xb144,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0xb144,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_core_ahb_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_ahb_clk = {
+       .halt_reg = 0xb11c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb11c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_cpas_ahb_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi0phytimer_clk = {
+       .halt_reg = 0x501c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x501c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csi0phytimer_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_csi0phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi1phytimer_clk = {
+       .halt_reg = 0x5040,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x5040,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csi1phytimer_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_csi1phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi2phytimer_clk = {
+       .halt_reg = 0x5064,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x5064,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csi2phytimer_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_csi2phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi3phytimer_clk = {
+       .halt_reg = 0x5088,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x5088,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csi3phytimer_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_csi3phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy0_clk = {
+       .halt_reg = 0x5020,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x5020,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csiphy0_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy1_clk = {
+       .halt_reg = 0x5044,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x5044,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csiphy1_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy2_clk = {
+       .halt_reg = 0x5068,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x5068,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csiphy2_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy3_clk = {
+       .halt_reg = 0x508c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x508c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_csiphy3_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_icp_clk = {
+       .halt_reg = 0xb0a0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb0a0,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_icp_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_icp_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_axi_clk = {
+       .halt_reg = 0x9080,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9080,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_0_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_clk = {
+       .halt_reg = 0x9028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9028,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_0_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_cphy_rx_clk = {
+       .halt_reg = 0x907c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x907c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_0_cphy_rx_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_csid_clk = {
+       .halt_reg = 0x9054,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9054,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_0_csid_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_0_csid_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_dsp_clk = {
+       .halt_reg = 0x9038,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9038,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_0_dsp_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_axi_clk = {
+       .halt_reg = 0xa058,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa058,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_1_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_clk = {
+       .halt_reg = 0xa028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa028,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_1_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_cphy_rx_clk = {
+       .halt_reg = 0xa054,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa054,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_1_cphy_rx_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_csid_clk = {
+       .halt_reg = 0xa04c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa04c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_1_csid_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_1_csid_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_dsp_clk = {
+       .halt_reg = 0xa030,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa030,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_1_dsp_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_clk = {
+       .halt_reg = 0xb01c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb01c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_lite_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_lite_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_cphy_rx_clk = {
+       .halt_reg = 0xb044,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb044,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_lite_cphy_rx_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_csid_clk = {
+       .halt_reg = 0xb03c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb03c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ife_lite_csid_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ife_lite_csid_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_0_ahb_clk = {
+       .halt_reg = 0x7040,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x7040,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ipe_0_ahb_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_0_areg_clk = {
+       .halt_reg = 0x703c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x703c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ipe_0_areg_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_0_axi_clk = {
+       .halt_reg = 0x7038,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x7038,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ipe_0_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_0_clk = {
+       .halt_reg = 0x7028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x7028,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_ipe_0_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_ipe_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_jpeg_clk = {
+       .halt_reg = 0xb064,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb064,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_jpeg_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_jpeg_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_lrme_clk = {
+       .halt_reg = 0xb110,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb110,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_lrme_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_lrme_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk0_clk = {
+       .halt_reg = 0x401c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x401c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_mclk0_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_mclk0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk1_clk = {
+       .halt_reg = 0x403c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x403c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_mclk1_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_mclk1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk2_clk = {
+       .halt_reg = 0x405c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x405c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_mclk2_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_mclk2_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk3_clk = {
+       .halt_reg = 0x407c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x407c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_mclk3_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_mclk3_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk4_clk = {
+       .halt_reg = 0x409c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x409c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_mclk4_clk",
+                       .parent_data = &(const struct clk_parent_data){
+                               .hw = &cam_cc_mclk4_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_soc_ahb_clk = {
+       .halt_reg = 0xb140,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb140,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_soc_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_sys_tmr_clk = {
+       .halt_reg = 0xb0a8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb0a8,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "cam_cc_sys_tmr_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct gdsc bps_gdsc = {
+       .gdscr = 0x6004,
+       .pd = {
+               .name = "bps_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = HW_CTRL,
+};
+
+static struct gdsc ife_0_gdsc = {
+       .gdscr = 0x9004,
+       .pd = {
+               .name = "ife_0_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc ife_1_gdsc = {
+       .gdscr = 0xa004,
+       .pd = {
+               .name = "ife_1_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc ipe_0_gdsc = {
+       .gdscr = 0x7004,
+       .pd = {
+               .name = "ipe_0_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = HW_CTRL,
+};
+
+static struct gdsc titan_top_gdsc = {
+       .gdscr = 0xb134,
+       .pd = {
+               .name = "titan_top_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct clk_hw *cam_cc_sc7180_hws[] = {
+       [CAM_CC_PLL2_OUT_EARLY] = &cam_cc_pll2_out_early.hw,
+};
+
+static struct clk_regmap *cam_cc_sc7180_clocks[] = {
+       [CAM_CC_BPS_AHB_CLK] = &cam_cc_bps_ahb_clk.clkr,
+       [CAM_CC_BPS_AREG_CLK] = &cam_cc_bps_areg_clk.clkr,
+       [CAM_CC_BPS_AXI_CLK] = &cam_cc_bps_axi_clk.clkr,
+       [CAM_CC_BPS_CLK] = &cam_cc_bps_clk.clkr,
+       [CAM_CC_BPS_CLK_SRC] = &cam_cc_bps_clk_src.clkr,
+       [CAM_CC_CAMNOC_AXI_CLK] = &cam_cc_camnoc_axi_clk.clkr,
+       [CAM_CC_CCI_0_CLK] = &cam_cc_cci_0_clk.clkr,
+       [CAM_CC_CCI_0_CLK_SRC] = &cam_cc_cci_0_clk_src.clkr,
+       [CAM_CC_CCI_1_CLK] = &cam_cc_cci_1_clk.clkr,
+       [CAM_CC_CCI_1_CLK_SRC] = &cam_cc_cci_1_clk_src.clkr,
+       [CAM_CC_CORE_AHB_CLK] = &cam_cc_core_ahb_clk.clkr,
+       [CAM_CC_CPAS_AHB_CLK] = &cam_cc_cpas_ahb_clk.clkr,
+       [CAM_CC_CPHY_RX_CLK_SRC] = &cam_cc_cphy_rx_clk_src.clkr,
+       [CAM_CC_CSI0PHYTIMER_CLK] = &cam_cc_csi0phytimer_clk.clkr,
+       [CAM_CC_CSI0PHYTIMER_CLK_SRC] = &cam_cc_csi0phytimer_clk_src.clkr,
+       [CAM_CC_CSI1PHYTIMER_CLK] = &cam_cc_csi1phytimer_clk.clkr,
+       [CAM_CC_CSI1PHYTIMER_CLK_SRC] = &cam_cc_csi1phytimer_clk_src.clkr,
+       [CAM_CC_CSI2PHYTIMER_CLK] = &cam_cc_csi2phytimer_clk.clkr,
+       [CAM_CC_CSI2PHYTIMER_CLK_SRC] = &cam_cc_csi2phytimer_clk_src.clkr,
+       [CAM_CC_CSI3PHYTIMER_CLK] = &cam_cc_csi3phytimer_clk.clkr,
+       [CAM_CC_CSI3PHYTIMER_CLK_SRC] = &cam_cc_csi3phytimer_clk_src.clkr,
+       [CAM_CC_CSIPHY0_CLK] = &cam_cc_csiphy0_clk.clkr,
+       [CAM_CC_CSIPHY1_CLK] = &cam_cc_csiphy1_clk.clkr,
+       [CAM_CC_CSIPHY2_CLK] = &cam_cc_csiphy2_clk.clkr,
+       [CAM_CC_CSIPHY3_CLK] = &cam_cc_csiphy3_clk.clkr,
+       [CAM_CC_FAST_AHB_CLK_SRC] = &cam_cc_fast_ahb_clk_src.clkr,
+       [CAM_CC_ICP_CLK] = &cam_cc_icp_clk.clkr,
+       [CAM_CC_ICP_CLK_SRC] = &cam_cc_icp_clk_src.clkr,
+       [CAM_CC_IFE_0_AXI_CLK] = &cam_cc_ife_0_axi_clk.clkr,
+       [CAM_CC_IFE_0_CLK] = &cam_cc_ife_0_clk.clkr,
+       [CAM_CC_IFE_0_CLK_SRC] = &cam_cc_ife_0_clk_src.clkr,
+       [CAM_CC_IFE_0_CPHY_RX_CLK] = &cam_cc_ife_0_cphy_rx_clk.clkr,
+       [CAM_CC_IFE_0_CSID_CLK] = &cam_cc_ife_0_csid_clk.clkr,
+       [CAM_CC_IFE_0_CSID_CLK_SRC] = &cam_cc_ife_0_csid_clk_src.clkr,
+       [CAM_CC_IFE_0_DSP_CLK] = &cam_cc_ife_0_dsp_clk.clkr,
+       [CAM_CC_IFE_1_AXI_CLK] = &cam_cc_ife_1_axi_clk.clkr,
+       [CAM_CC_IFE_1_CLK] = &cam_cc_ife_1_clk.clkr,
+       [CAM_CC_IFE_1_CLK_SRC] = &cam_cc_ife_1_clk_src.clkr,
+       [CAM_CC_IFE_1_CPHY_RX_CLK] = &cam_cc_ife_1_cphy_rx_clk.clkr,
+       [CAM_CC_IFE_1_CSID_CLK] = &cam_cc_ife_1_csid_clk.clkr,
+       [CAM_CC_IFE_1_CSID_CLK_SRC] = &cam_cc_ife_1_csid_clk_src.clkr,
+       [CAM_CC_IFE_1_DSP_CLK] = &cam_cc_ife_1_dsp_clk.clkr,
+       [CAM_CC_IFE_LITE_CLK] = &cam_cc_ife_lite_clk.clkr,
+       [CAM_CC_IFE_LITE_CLK_SRC] = &cam_cc_ife_lite_clk_src.clkr,
+       [CAM_CC_IFE_LITE_CPHY_RX_CLK] = &cam_cc_ife_lite_cphy_rx_clk.clkr,
+       [CAM_CC_IFE_LITE_CSID_CLK] = &cam_cc_ife_lite_csid_clk.clkr,
+       [CAM_CC_IFE_LITE_CSID_CLK_SRC] = &cam_cc_ife_lite_csid_clk_src.clkr,
+       [CAM_CC_IPE_0_AHB_CLK] = &cam_cc_ipe_0_ahb_clk.clkr,
+       [CAM_CC_IPE_0_AREG_CLK] = &cam_cc_ipe_0_areg_clk.clkr,
+       [CAM_CC_IPE_0_AXI_CLK] = &cam_cc_ipe_0_axi_clk.clkr,
+       [CAM_CC_IPE_0_CLK] = &cam_cc_ipe_0_clk.clkr,
+       [CAM_CC_IPE_0_CLK_SRC] = &cam_cc_ipe_0_clk_src.clkr,
+       [CAM_CC_JPEG_CLK] = &cam_cc_jpeg_clk.clkr,
+       [CAM_CC_JPEG_CLK_SRC] = &cam_cc_jpeg_clk_src.clkr,
+       [CAM_CC_LRME_CLK] = &cam_cc_lrme_clk.clkr,
+       [CAM_CC_LRME_CLK_SRC] = &cam_cc_lrme_clk_src.clkr,
+       [CAM_CC_MCLK0_CLK] = &cam_cc_mclk0_clk.clkr,
+       [CAM_CC_MCLK0_CLK_SRC] = &cam_cc_mclk0_clk_src.clkr,
+       [CAM_CC_MCLK1_CLK] = &cam_cc_mclk1_clk.clkr,
+       [CAM_CC_MCLK1_CLK_SRC] = &cam_cc_mclk1_clk_src.clkr,
+       [CAM_CC_MCLK2_CLK] = &cam_cc_mclk2_clk.clkr,
+       [CAM_CC_MCLK2_CLK_SRC] = &cam_cc_mclk2_clk_src.clkr,
+       [CAM_CC_MCLK3_CLK] = &cam_cc_mclk3_clk.clkr,
+       [CAM_CC_MCLK3_CLK_SRC] = &cam_cc_mclk3_clk_src.clkr,
+       [CAM_CC_MCLK4_CLK] = &cam_cc_mclk4_clk.clkr,
+       [CAM_CC_MCLK4_CLK_SRC] = &cam_cc_mclk4_clk_src.clkr,
+       [CAM_CC_PLL0] = &cam_cc_pll0.clkr,
+       [CAM_CC_PLL1] = &cam_cc_pll1.clkr,
+       [CAM_CC_PLL2] = &cam_cc_pll2.clkr,
+       [CAM_CC_PLL2_OUT_AUX] = &cam_cc_pll2_out_aux.clkr,
+       [CAM_CC_PLL3] = &cam_cc_pll3.clkr,
+       [CAM_CC_SLOW_AHB_CLK_SRC] = &cam_cc_slow_ahb_clk_src.clkr,
+       [CAM_CC_SOC_AHB_CLK] = &cam_cc_soc_ahb_clk.clkr,
+       [CAM_CC_SYS_TMR_CLK] = &cam_cc_sys_tmr_clk.clkr,
+};
+static struct gdsc *cam_cc_sc7180_gdscs[] = {
+       [BPS_GDSC] = &bps_gdsc,
+       [IFE_0_GDSC] = &ife_0_gdsc,
+       [IFE_1_GDSC] = &ife_1_gdsc,
+       [IPE_0_GDSC] = &ipe_0_gdsc,
+       [TITAN_TOP_GDSC] = &titan_top_gdsc,
+};
+
+static const struct regmap_config cam_cc_sc7180_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .max_register = 0xd028,
+       .fast_io = true,
+};
+
+static const struct qcom_cc_desc cam_cc_sc7180_desc = {
+       .config = &cam_cc_sc7180_regmap_config,
+       .clk_hws = cam_cc_sc7180_hws,
+       .num_clk_hws = ARRAY_SIZE(cam_cc_sc7180_hws),
+       .clks = cam_cc_sc7180_clocks,
+       .num_clks = ARRAY_SIZE(cam_cc_sc7180_clocks),
+       .gdscs = cam_cc_sc7180_gdscs,
+       .num_gdscs = ARRAY_SIZE(cam_cc_sc7180_gdscs),
+};
+
+static const struct of_device_id cam_cc_sc7180_match_table[] = {
+       { .compatible = "qcom,sc7180-camcc" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, cam_cc_sc7180_match_table);
+
+static int cam_cc_sc7180_probe(struct platform_device *pdev)
+{
+       struct regmap *regmap;
+       int ret;
+
+       pm_runtime_enable(&pdev->dev);
+       ret = pm_clk_create(&pdev->dev);
+       if (ret < 0)
+               return ret;
+
+       ret = pm_clk_add(&pdev->dev, "xo");
+       if (ret < 0) {
+               dev_err(&pdev->dev, "Failed to acquire XO clock\n");
+               goto disable_pm_runtime;
+       }
+
+       ret = pm_clk_add(&pdev->dev, "iface");
+       if (ret < 0) {
+               dev_err(&pdev->dev, "Failed to acquire iface clock\n");
+               goto disable_pm_runtime;
+       }
+
+       ret = pm_runtime_get(&pdev->dev);
+       if (ret)
+               goto destroy_pm_clk;
+
+       regmap = qcom_cc_map(pdev, &cam_cc_sc7180_desc);
+       if (IS_ERR(regmap)) {
+               ret = PTR_ERR(regmap);
+               pm_runtime_put(&pdev->dev);
+               goto destroy_pm_clk;
+       }
+
+       clk_fabia_pll_configure(&cam_cc_pll0, regmap, &cam_cc_pll0_config);
+       clk_fabia_pll_configure(&cam_cc_pll1, regmap, &cam_cc_pll1_config);
+       clk_agera_pll_configure(&cam_cc_pll2, regmap, &cam_cc_pll2_config);
+       clk_fabia_pll_configure(&cam_cc_pll3, regmap, &cam_cc_pll3_config);
+
+       ret = qcom_cc_really_probe(pdev, &cam_cc_sc7180_desc, regmap);
+       pm_runtime_put(&pdev->dev);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "Failed to register CAM CC clocks\n");
+               goto destroy_pm_clk;
+       }
+
+       return 0;
+
+destroy_pm_clk:
+       pm_clk_destroy(&pdev->dev);
+
+disable_pm_runtime:
+       pm_runtime_disable(&pdev->dev);
+
+       return ret;
+}
+
+static const struct dev_pm_ops cam_cc_pm_ops = {
+       SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
+};
+
+static struct platform_driver cam_cc_sc7180_driver = {
+       .probe = cam_cc_sc7180_probe,
+       .driver = {
+               .name = "cam_cc-sc7180",
+               .of_match_table = cam_cc_sc7180_match_table,
+               .pm = &cam_cc_pm_ops,
+       },
+};
+
+static int __init cam_cc_sc7180_init(void)
+{
+       return platform_driver_register(&cam_cc_sc7180_driver);
+}
+subsys_initcall(cam_cc_sc7180_init);
+
+static void __exit cam_cc_sc7180_exit(void)
+{
+       platform_driver_unregister(&cam_cc_sc7180_driver);
+}
+module_exit(cam_cc_sc7180_exit);
+
+MODULE_DESCRIPTION("QTI CAM_CC SC7180 Driver");
+MODULE_LICENSE("GPL v2");
index 5644311..21c357c 100644 (file)
@@ -116,6 +116,16 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = {
                [PLL_OFF_OPMODE] = 0x38,
                [PLL_OFF_ALPHA_VAL] = 0x40,
        },
+       [CLK_ALPHA_PLL_TYPE_AGERA] =  {
+               [PLL_OFF_L_VAL] = 0x04,
+               [PLL_OFF_ALPHA_VAL] = 0x08,
+               [PLL_OFF_USER_CTL] = 0x0c,
+               [PLL_OFF_CONFIG_CTL] = 0x10,
+               [PLL_OFF_CONFIG_CTL_U] = 0x14,
+               [PLL_OFF_TEST_CTL] = 0x18,
+               [PLL_OFF_TEST_CTL_U] = 0x1c,
+               [PLL_OFF_STATUS] = 0x2c,
+       },
 };
 EXPORT_SYMBOL_GPL(clk_alpha_pll_regs);
 
@@ -207,6 +217,13 @@ static int wait_for_pll(struct clk_alpha_pll *pll, u32 mask, bool inverse,
 #define wait_for_pll_update_ack_clear(pll) \
        wait_for_pll(pll, ALPHA_PLL_ACK_LATCH, 1, "update_ack_clear")
 
+static void clk_alpha_pll_write_config(struct regmap *regmap, unsigned int reg,
+                                       unsigned int val)
+{
+       if (val)
+               regmap_write(regmap, reg, val);
+}
+
 void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                             const struct alpha_pll_config *config)
 {
@@ -1004,33 +1021,19 @@ void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
 {
        u32 val, mask;
 
-       if (config->l)
-               regmap_write(regmap, PLL_L_VAL(pll), config->l);
-
-       if (config->alpha)
-               regmap_write(regmap, PLL_FRAC(pll), config->alpha);
-
-       if (config->config_ctl_val)
-               regmap_write(regmap, PLL_CONFIG_CTL(pll),
+       clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l);
+       clk_alpha_pll_write_config(regmap, PLL_FRAC(pll), config->alpha);
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll),
                                                config->config_ctl_val);
-
-       if (config->config_ctl_hi_val)
-               regmap_write(regmap, PLL_CONFIG_CTL_U(pll),
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll),
                                                config->config_ctl_hi_val);
-
-       if (config->user_ctl_val)
-               regmap_write(regmap, PLL_USER_CTL(pll), config->user_ctl_val);
-
-       if (config->user_ctl_hi_val)
-               regmap_write(regmap, PLL_USER_CTL_U(pll),
+       clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll),
+                                               config->user_ctl_val);
+       clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U(pll),
                                                config->user_ctl_hi_val);
-
-       if (config->test_ctl_val)
-               regmap_write(regmap, PLL_TEST_CTL(pll),
+       clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll),
                                                config->test_ctl_val);
-
-       if (config->test_ctl_hi_val)
-               regmap_write(regmap, PLL_TEST_CTL_U(pll),
+       clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U(pll),
                                                config->test_ctl_hi_val);
 
        if (config->post_div_mask) {
@@ -1145,25 +1148,38 @@ static unsigned long alpha_pll_fabia_recalc_rate(struct clk_hw *hw,
        return alpha_pll_calc_rate(parent_rate, l, frac, alpha_width);
 }
 
+/*
+ * Due to limited number of bits for fractional rate programming, the
+ * rounded up rate could be marginally higher than the requested rate.
+ */
+static int alpha_pll_check_rate_margin(struct clk_hw *hw,
+                       unsigned long rrate, unsigned long rate)
+{
+       unsigned long rate_margin = rate + PLL_RATE_MARGIN;
+
+       if (rrate > rate_margin || rrate < rate) {
+               pr_err("%s: Rounded rate %lu not within range [%lu, %lu)\n",
+                      clk_hw_get_name(hw), rrate, rate, rate_margin);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int alpha_pll_fabia_set_rate(struct clk_hw *hw, unsigned long rate,
                                                unsigned long prate)
 {
        struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
        u32 l, alpha_width = pll_alpha_width(pll);
+       unsigned long rrate;
+       int ret;
        u64 a;
-       unsigned long rrate, max = rate + PLL_RATE_MARGIN;
 
        rrate = alpha_pll_round_rate(rate, prate, &l, &a, alpha_width);
 
-       /*
-        * Due to limited number of bits for fractional rate programming, the
-        * rounded up rate could be marginally higher than the requested rate.
-        */
-       if (rrate > (rate + PLL_RATE_MARGIN) || rrate < rate) {
-               pr_err("%s: Rounded rate %lu not within range [%lu, %lu)\n",
-                      clk_hw_get_name(hw), rrate, rate, max);
-               return -EINVAL;
-       }
+       ret = alpha_pll_check_rate_margin(hw, rrate, rate);
+       if (ret < 0)
+               return ret;
 
        regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l);
        regmap_write(pll->clkr.regmap, PLL_FRAC(pll), a);
@@ -1206,12 +1222,10 @@ static int alpha_pll_fabia_prepare(struct clk_hw *hw)
 
        rrate = alpha_pll_round_rate(cal_freq, clk_hw_get_rate(parent_hw),
                                        &cal_l, &a, alpha_width);
-       /*
-        * Due to a limited number of bits for fractional rate programming, the
-        * rounded up rate could be marginally higher than the requested rate.
-        */
-       if (rrate > (cal_freq + PLL_RATE_MARGIN) || rrate < cal_freq)
-               return -EINVAL;
+
+       ret = alpha_pll_check_rate_margin(hw, rrate, cal_freq);
+       if (ret < 0)
+               return ret;
 
        /* Setup PLL for calibration frequency */
        regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), cal_l);
@@ -1388,49 +1402,27 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops);
 void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                             const struct alpha_pll_config *config)
 {
-       if (config->l)
-               regmap_write(regmap, PLL_L_VAL(pll), config->l);
-
+       clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l);
        regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL);
-
-       if (config->alpha)
-               regmap_write(regmap, PLL_ALPHA_VAL(pll), config->alpha);
-
-       if (config->config_ctl_val)
-               regmap_write(regmap, PLL_CONFIG_CTL(pll),
-                            config->config_ctl_val);
-
-       if (config->config_ctl_hi_val)
-               regmap_write(regmap, PLL_CONFIG_CTL_U(pll),
-                            config->config_ctl_hi_val);
-
-       if (config->config_ctl_hi1_val)
-               regmap_write(regmap, PLL_CONFIG_CTL_U1(pll),
-                            config->config_ctl_hi1_val);
-
-       if (config->user_ctl_val)
-               regmap_write(regmap, PLL_USER_CTL(pll),
-                            config->user_ctl_val);
-
-       if (config->user_ctl_hi_val)
-               regmap_write(regmap, PLL_USER_CTL_U(pll),
-                            config->user_ctl_hi_val);
-
-       if (config->user_ctl_hi1_val)
-               regmap_write(regmap, PLL_USER_CTL_U1(pll),
-                            config->user_ctl_hi1_val);
-
-       if (config->test_ctl_val)
-               regmap_write(regmap, PLL_TEST_CTL(pll),
-                            config->test_ctl_val);
-
-       if (config->test_ctl_hi_val)
-               regmap_write(regmap, PLL_TEST_CTL_U(pll),
-                            config->test_ctl_hi_val);
-
-       if (config->test_ctl_hi1_val)
-               regmap_write(regmap, PLL_TEST_CTL_U1(pll),
-                            config->test_ctl_hi1_val);
+       clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha);
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll),
+                                    config->config_ctl_val);
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll),
+                                    config->config_ctl_hi_val);
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U1(pll),
+                                    config->config_ctl_hi1_val);
+       clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll),
+                                       config->user_ctl_val);
+       clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U(pll),
+                                       config->user_ctl_hi_val);
+       clk_alpha_pll_write_config(regmap, PLL_USER_CTL_U1(pll),
+                                       config->user_ctl_hi1_val);
+       clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll),
+                                       config->test_ctl_val);
+       clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U(pll),
+                                       config->test_ctl_hi_val);
+       clk_alpha_pll_write_config(regmap, PLL_TEST_CTL_U1(pll),
+                                       config->test_ctl_hi1_val);
 
        regmap_update_bits(regmap, PLL_MODE(pll), PLL_UPDATE_BYPASS,
                           PLL_UPDATE_BYPASS);
@@ -1490,14 +1482,9 @@ static int alpha_pll_trion_set_rate(struct clk_hw *hw, unsigned long rate,
 
        rrate = alpha_pll_round_rate(rate, prate, &l, &a, alpha_width);
 
-       /*
-        * Due to a limited number of bits for fractional rate programming, the
-        * rounded up rate could be marginally higher than the requested rate.
-        */
-       if (rrate > (rate + PLL_RATE_MARGIN) || rrate < rate) {
-               pr_err("Call set rate on the PLL with rounded rates!\n");
-               return -EINVAL;
-       }
+       ret = alpha_pll_check_rate_margin(hw, rrate, rate);
+       if (ret < 0)
+               return ret;
 
        regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l);
        regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), a);
@@ -1561,3 +1548,55 @@ const struct clk_ops clk_alpha_pll_postdiv_lucid_ops = {
        .set_rate = clk_alpha_pll_postdiv_fabia_set_rate,
 };
 EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_lucid_ops);
+
+void clk_agera_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
+                       const struct alpha_pll_config *config)
+{
+       clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l);
+       clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha);
+       clk_alpha_pll_write_config(regmap, PLL_USER_CTL(pll),
+                                                       config->user_ctl_val);
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL(pll),
+                                               config->config_ctl_val);
+       clk_alpha_pll_write_config(regmap, PLL_CONFIG_CTL_U(pll),
+                                               config->config_ctl_hi_val);
+       clk_alpha_pll_write_config(regmap, PLL_TEST_CTL(pll),
+                                               config->test_ctl_val);
+       clk_alpha_pll_write_config(regmap,  PLL_TEST_CTL_U(pll),
+                                               config->test_ctl_hi_val);
+}
+EXPORT_SYMBOL_GPL(clk_agera_pll_configure);
+
+static int clk_alpha_pll_agera_set_rate(struct clk_hw *hw, unsigned long rate,
+                                                       unsigned long prate)
+{
+       struct clk_alpha_pll *pll = to_clk_alpha_pll(hw);
+       u32 l, alpha_width = pll_alpha_width(pll);
+       int ret;
+       unsigned long rrate;
+       u64 a;
+
+       rrate = alpha_pll_round_rate(rate, prate, &l, &a, alpha_width);
+       ret = alpha_pll_check_rate_margin(hw, rrate, rate);
+       if (ret < 0)
+               return ret;
+
+       /* change L_VAL without having to go through the power on sequence */
+       regmap_write(pll->clkr.regmap, PLL_L_VAL(pll), l);
+       regmap_write(pll->clkr.regmap, PLL_ALPHA_VAL(pll), a);
+
+       if (clk_hw_is_enabled(hw))
+               return wait_for_pll_enable_lock(pll);
+
+       return 0;
+}
+
+const struct clk_ops clk_alpha_pll_agera_ops = {
+       .enable = clk_alpha_pll_enable,
+       .disable = clk_alpha_pll_disable,
+       .is_enabled = clk_alpha_pll_is_enabled,
+       .recalc_rate = alpha_pll_fabia_recalc_rate,
+       .round_rate = clk_alpha_pll_round_rate,
+       .set_rate = clk_alpha_pll_agera_set_rate,
+};
+EXPORT_SYMBOL_GPL(clk_alpha_pll_agera_ops);
index d3201b8..0ea30d2 100644 (file)
@@ -15,6 +15,7 @@ enum {
        CLK_ALPHA_PLL_TYPE_FABIA,
        CLK_ALPHA_PLL_TYPE_TRION,
        CLK_ALPHA_PLL_TYPE_LUCID = CLK_ALPHA_PLL_TYPE_TRION,
+       CLK_ALPHA_PLL_TYPE_AGERA,
        CLK_ALPHA_PLL_TYPE_MAX,
 };
 
@@ -141,6 +142,7 @@ extern const struct clk_ops clk_alpha_pll_postdiv_trion_ops;
 extern const struct clk_ops clk_alpha_pll_lucid_ops;
 #define clk_alpha_pll_fixed_lucid_ops clk_alpha_pll_fixed_trion_ops
 extern const struct clk_ops clk_alpha_pll_postdiv_lucid_ops;
+extern const struct clk_ops clk_alpha_pll_agera_ops;
 
 void clk_alpha_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                             const struct alpha_pll_config *config);
@@ -148,6 +150,8 @@ void clk_fabia_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                                const struct alpha_pll_config *config);
 void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
                             const struct alpha_pll_config *config);
+void clk_agera_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap,
+                               const struct alpha_pll_config *config);
 #define clk_lucid_pll_configure(pll, regmap, config) \
        clk_trion_pll_configure(pll, regmap, config)
 
index e2c669b..6a2a13c 100644 (file)
@@ -349,6 +349,7 @@ DEFINE_CLK_RPMH_VRM(sdm845, rf_clk2, rf_clk2_ao, "rfclka2", 1);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk3, rf_clk3_ao, "rfclka3", 1);
 DEFINE_CLK_RPMH_VRM(sm8150, rf_clk3, rf_clk3_ao, "rfclka3", 1);
 DEFINE_CLK_RPMH_BCM(sdm845, ipa, "IP0");
+DEFINE_CLK_RPMH_BCM(sdm845, ce, "CE0");
 
 static struct clk_hw *sdm845_rpmh_clocks[] = {
        [RPMH_CXO_CLK]          = &sdm845_bi_tcxo.hw,
@@ -364,6 +365,7 @@ static struct clk_hw *sdm845_rpmh_clocks[] = {
        [RPMH_RF_CLK3]          = &sdm845_rf_clk3.hw,
        [RPMH_RF_CLK3_A]        = &sdm845_rf_clk3_ao.hw,
        [RPMH_IPA_CLK]          = &sdm845_ipa.hw,
+       [RPMH_CE_CLK]           = &sdm845_ce.hw,
 };
 
 static const struct clk_rpmh_desc clk_rpmh_sdm845 = {
@@ -371,6 +373,25 @@ static const struct clk_rpmh_desc clk_rpmh_sdm845 = {
        .num_clks = ARRAY_SIZE(sdm845_rpmh_clocks),
 };
 
+DEFINE_CLK_RPMH_VRM(sdx55, rf_clk1, rf_clk1_ao, "rfclkd1", 1);
+DEFINE_CLK_RPMH_VRM(sdx55, rf_clk2, rf_clk2_ao, "rfclkd2", 1);
+DEFINE_CLK_RPMH_BCM(sdx55, qpic_clk, "QP0");
+
+static struct clk_hw *sdx55_rpmh_clocks[] = {
+       [RPMH_CXO_CLK]          = &sdm845_bi_tcxo.hw,
+       [RPMH_CXO_CLK_A]        = &sdm845_bi_tcxo_ao.hw,
+       [RPMH_RF_CLK1]          = &sdx55_rf_clk1.hw,
+       [RPMH_RF_CLK1_A]        = &sdx55_rf_clk1_ao.hw,
+       [RPMH_RF_CLK2]          = &sdx55_rf_clk2.hw,
+       [RPMH_RF_CLK2_A]        = &sdx55_rf_clk2_ao.hw,
+       [RPMH_QPIC_CLK]         = &sdx55_qpic_clk.hw,
+};
+
+static const struct clk_rpmh_desc clk_rpmh_sdx55 = {
+       .clks = sdx55_rpmh_clocks,
+       .num_clks = ARRAY_SIZE(sdx55_rpmh_clocks),
+};
+
 static struct clk_hw *sm8150_rpmh_clocks[] = {
        [RPMH_CXO_CLK]          = &sdm845_bi_tcxo.hw,
        [RPMH_CXO_CLK_A]        = &sdm845_bi_tcxo_ao.hw,
@@ -432,6 +453,39 @@ static const struct clk_rpmh_desc clk_rpmh_sm8250 = {
        .num_clks = ARRAY_SIZE(sm8250_rpmh_clocks),
 };
 
+DEFINE_CLK_RPMH_VRM(sm8350, div_clk1, div_clk1_ao, "divclka1", 2);
+DEFINE_CLK_RPMH_VRM(sm8350, rf_clk4, rf_clk4_ao, "rfclka4", 1);
+DEFINE_CLK_RPMH_VRM(sm8350, rf_clk5, rf_clk5_ao, "rfclka5", 1);
+DEFINE_CLK_RPMH_BCM(sm8350, pka, "PKA0");
+DEFINE_CLK_RPMH_BCM(sm8350, hwkm, "HK0");
+
+static struct clk_hw *sm8350_rpmh_clocks[] = {
+       [RPMH_CXO_CLK]          = &sdm845_bi_tcxo.hw,
+       [RPMH_CXO_CLK_A]        = &sdm845_bi_tcxo_ao.hw,
+       [RPMH_DIV_CLK1]         = &sm8350_div_clk1.hw,
+       [RPMH_DIV_CLK1_A]       = &sm8350_div_clk1_ao.hw,
+       [RPMH_LN_BB_CLK1]       = &sm8250_ln_bb_clk1.hw,
+       [RPMH_LN_BB_CLK1_A]     = &sm8250_ln_bb_clk1_ao.hw,
+       [RPMH_LN_BB_CLK2]       = &sdm845_ln_bb_clk2.hw,
+       [RPMH_LN_BB_CLK2_A]     = &sdm845_ln_bb_clk2_ao.hw,
+       [RPMH_RF_CLK1]          = &sdm845_rf_clk1.hw,
+       [RPMH_RF_CLK1_A]        = &sdm845_rf_clk1_ao.hw,
+       [RPMH_RF_CLK3]          = &sdm845_rf_clk3.hw,
+       [RPMH_RF_CLK3_A]        = &sdm845_rf_clk3_ao.hw,
+       [RPMH_RF_CLK4]          = &sm8350_rf_clk4.hw,
+       [RPMH_RF_CLK4_A]        = &sm8350_rf_clk4_ao.hw,
+       [RPMH_RF_CLK5]          = &sm8350_rf_clk5.hw,
+       [RPMH_RF_CLK5_A]        = &sm8350_rf_clk5_ao.hw,
+       [RPMH_IPA_CLK]          = &sdm845_ipa.hw,
+       [RPMH_PKA_CLK]          = &sm8350_pka.hw,
+       [RPMH_HWKM_CLK]         = &sm8350_hwkm.hw,
+};
+
+static const struct clk_rpmh_desc clk_rpmh_sm8350 = {
+       .clks = sm8350_rpmh_clocks,
+       .num_clks = ARRAY_SIZE(sm8350_rpmh_clocks),
+};
+
 static struct clk_hw *of_clk_rpmh_hw_get(struct of_phandle_args *clkspec,
                                         void *data)
 {
@@ -517,8 +571,10 @@ static int clk_rpmh_probe(struct platform_device *pdev)
 static const struct of_device_id clk_rpmh_match_table[] = {
        { .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180},
        { .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845},
+       { .compatible = "qcom,sdx55-rpmh-clk",  .data = &clk_rpmh_sdx55},
        { .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150},
        { .compatible = "qcom,sm8250-rpmh-clk", .data = &clk_rpmh_sm8250},
+       { .compatible = "qcom,sm8350-rpmh-clk", .data = &clk_rpmh_sm8350},
        { }
 };
 MODULE_DEVICE_TABLE(of, clk_rpmh_match_table);
index 07a98d3..588575e 100644 (file)
@@ -963,6 +963,7 @@ static struct gdsc mdss_gdsc = {
        },
        .pwrsts = PWRSTS_OFF_ON,
        .flags = HW_CTRL,
+       .supply = "mmcx",
 };
 
 static struct clk_regmap *disp_cc_sm8250_clocks[] = {
index 68d8f7a..d82d725 100644 (file)
@@ -642,7 +642,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
                .name = "gcc_sdcc1_ice_core_clk_src",
                .parent_data = gcc_parent_data_0,
                .num_parents = 4,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
@@ -651,6 +651,7 @@ static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
        F(9600000, P_BI_TCXO, 2, 0, 0),
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
        F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
        F(202000000, P_GPLL7_OUT_MAIN, 4, 0, 0),
        { }
@@ -666,7 +667,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = {
                .name = "gcc_sdcc2_apps_clk_src",
                .parent_data = gcc_parent_data_5,
                .num_parents = 5,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
diff --git a/drivers/clk/qcom/gcc-sdx55.c b/drivers/clk/qcom/gcc-sdx55.c
new file mode 100644 (file)
index 0000000..e3b9030
--- /dev/null
@@ -0,0 +1,1659 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2020, Linaro Ltd.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,gcc-sdx55.h>
+
+#include "common.h"
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-pll.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+       P_BI_TCXO,
+       P_CORE_BI_PLL_TEST_SE,
+       P_GPLL0_OUT_EVEN,
+       P_GPLL0_OUT_MAIN,
+       P_GPLL4_OUT_EVEN,
+       P_GPLL5_OUT_MAIN,
+       P_SLEEP_CLK,
+};
+
+static const struct pll_vco lucid_vco[] = {
+       { 249600000, 2000000000, 0 },
+};
+
+static struct clk_alpha_pll gpll0 = {
+       .offset = 0x0,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+       .vco_table = lucid_vco,
+       .num_vco = ARRAY_SIZE(lucid_vco),
+       .clkr = {
+               .enable_reg = 0x6d000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gpll0",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_fixed_lucid_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_lucid_even[] = {
+       { 0x0, 1 },
+       { 0x1, 2 },
+       { 0x3, 4 },
+       { 0x7, 8 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv gpll0_out_even = {
+       .offset = 0x0,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+       .post_div_shift = 8,
+       .post_div_table = post_div_table_lucid_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_lucid_even),
+       .width = 4,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gpll0_out_even",
+               .parent_data = &(const struct clk_parent_data){
+                       .hw = &gpll0.clkr.hw,
+               },
+               .num_parents = 1,
+               .ops = &clk_alpha_pll_postdiv_lucid_ops,
+       },
+};
+
+static struct clk_alpha_pll gpll4 = {
+       .offset = 0x76000,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+       .vco_table = lucid_vco,
+       .num_vco = ARRAY_SIZE(lucid_vco),
+       .clkr = {
+               .enable_reg = 0x6d000,
+               .enable_mask = BIT(4),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gpll4",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_fixed_lucid_ops,
+               },
+       },
+};
+
+static struct clk_alpha_pll_postdiv gpll4_out_even = {
+       .offset = 0x76000,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+       .post_div_shift = 8,
+       .post_div_table = post_div_table_lucid_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_lucid_even),
+       .width = 4,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gpll4_out_even",
+               .parent_data = &(const struct clk_parent_data){
+                       .hw = &gpll4.clkr.hw,
+               },
+               .num_parents = 1,
+               .ops = &clk_alpha_pll_postdiv_lucid_ops,
+       },
+};
+
+static struct clk_alpha_pll gpll5 = {
+       .offset = 0x74000,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID],
+       .vco_table = lucid_vco,
+       .num_vco = ARRAY_SIZE(lucid_vco),
+       .clkr = {
+               .enable_reg = 0x6d000,
+               .enable_mask = BIT(5),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gpll5",
+                       .parent_data = &(const struct clk_parent_data){
+                               .fw_name = "bi_tcxo",
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_fixed_lucid_ops,
+               },
+       },
+};
+
+static const struct parent_map gcc_parent_map_0[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_0[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct clk_parent_data gcc_parents_0_ao[] = {
+       { .fw_name = "bi_tcxo_ao" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_2[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_GPLL4_OUT_EVEN, 2 },
+       { P_GPLL5_OUT_MAIN, 5 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_2[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll4_out_even.clkr.hw },
+       { .hw = &gpll5.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_3[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_SLEEP_CLK, 5 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_3[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .fw_name = "sleep_clk", .name = "sleep_clk" },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_4[] = {
+       { P_BI_TCXO, 0 },
+       { P_SLEEP_CLK, 5 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_4[] = {
+       { .fw_name = "bi_tcxo" },
+       { .fw_name = "sleep_clk", .name = "sleep_clk" },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct parent_map gcc_parent_map_5[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 1 },
+       { P_GPLL4_OUT_EVEN, 2 },
+       { P_GPLL0_OUT_EVEN, 6 },
+       { P_CORE_BI_PLL_TEST_SE, 7 },
+};
+
+static const struct clk_parent_data gcc_parents_5[] = {
+       { .fw_name = "bi_tcxo" },
+       { .hw = &gpll0.clkr.hw },
+       { .hw = &gpll4_out_even.clkr.hw },
+       { .hw = &gpll0_out_even.clkr.hw },
+       { .fw_name = "core_bi_pll_test_se" },
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_qup1_i2c_apps_clk_src[] = {
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_blsp1_qup1_i2c_apps_clk_src = {
+       .cmd_rcgr = 0x11024,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup1_i2c_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_qup1_spi_apps_clk_src[] = {
+       F(960000, P_BI_TCXO, 10, 1, 2),
+       F(4800000, P_BI_TCXO, 4, 0, 0),
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(15000000, P_GPLL0_OUT_EVEN, 5, 1, 4),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(24000000, P_GPLL0_OUT_MAIN, 12.5, 1, 2),
+       F(25000000, P_GPLL0_OUT_MAIN, 12, 1, 2),
+       F(50000000, P_GPLL0_OUT_MAIN, 12, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_blsp1_qup1_spi_apps_clk_src = {
+       .cmd_rcgr = 0x1100c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup1_spi_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_qup2_i2c_apps_clk_src = {
+       .cmd_rcgr = 0x13024,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup2_i2c_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_qup2_spi_apps_clk_src = {
+       .cmd_rcgr = 0x1300c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup2_spi_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_qup3_i2c_apps_clk_src = {
+       .cmd_rcgr = 0x15024,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup3_i2c_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_qup3_spi_apps_clk_src = {
+       .cmd_rcgr = 0x1500c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup3_spi_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_qup4_i2c_apps_clk_src = {
+       .cmd_rcgr = 0x17024,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_i2c_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup4_i2c_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_qup4_spi_apps_clk_src = {
+       .cmd_rcgr = 0x1700c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_qup1_spi_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_qup4_spi_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_uart1_apps_clk_src[] = {
+       F(3686400, P_GPLL0_OUT_EVEN, 1, 192, 15625),
+       F(7372800, P_GPLL0_OUT_EVEN, 1, 384, 15625),
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(14745600, P_GPLL0_OUT_EVEN, 1, 768, 15625),
+       F(16000000, P_GPLL0_OUT_EVEN, 1, 4, 75),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(19354839, P_GPLL0_OUT_MAIN, 15.5, 1, 2),
+       F(20000000, P_GPLL0_OUT_MAIN, 15, 1, 2),
+       F(20689655, P_GPLL0_OUT_MAIN, 14.5, 1, 2),
+       F(21428571, P_GPLL0_OUT_MAIN, 14, 1, 2),
+       F(22222222, P_GPLL0_OUT_MAIN, 13.5, 1, 2),
+       F(23076923, P_GPLL0_OUT_MAIN, 13, 1, 2),
+       F(24000000, P_GPLL0_OUT_MAIN, 5, 1, 5),
+       F(25000000, P_GPLL0_OUT_MAIN, 12, 1, 2),
+       F(26086957, P_GPLL0_OUT_MAIN, 11.5, 1, 2),
+       F(27272727, P_GPLL0_OUT_MAIN, 11, 1, 2),
+       F(28571429, P_GPLL0_OUT_MAIN, 10.5, 1, 2),
+       F(32000000, P_GPLL0_OUT_MAIN, 1, 4, 75),
+       F(40000000, P_GPLL0_OUT_MAIN, 15, 0, 0),
+       F(46400000, P_GPLL0_OUT_MAIN, 1, 29, 375),
+       F(48000000, P_GPLL0_OUT_MAIN, 12.5, 0, 0),
+       F(51200000, P_GPLL0_OUT_MAIN, 1, 32, 375),
+       F(56000000, P_GPLL0_OUT_MAIN, 1, 7, 75),
+       F(58982400, P_GPLL0_OUT_MAIN, 1, 1536, 15625),
+       F(60000000, P_GPLL0_OUT_MAIN, 10, 0, 0),
+       F(63157895, P_GPLL0_OUT_MAIN, 9.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_blsp1_uart1_apps_clk_src = {
+       .cmd_rcgr = 0x1200c,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_uart1_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_uart2_apps_clk_src = {
+       .cmd_rcgr = 0x1400c,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_uart2_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_uart3_apps_clk_src = {
+       .cmd_rcgr = 0x1600c,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_uart3_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_blsp1_uart4_apps_clk_src = {
+       .cmd_rcgr = 0x1800c,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_blsp1_uart1_apps_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_blsp1_uart4_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_cpuss_ahb_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       F(133333333, P_GPLL0_OUT_MAIN, 4.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_cpuss_ahb_clk_src = {
+       .cmd_rcgr = 0x24010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_cpuss_ahb_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_cpuss_ahb_clk_src",
+               .parent_data = gcc_parents_0_ao,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_cpuss_rbcpr_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_cpuss_rbcpr_clk_src = {
+       .cmd_rcgr = 0x2402c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_cpuss_rbcpr_clk_src",
+               .parent_data = gcc_parents_0_ao,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_emac_clk_src[] = {
+       F(2500000, P_BI_TCXO, 1, 25, 192),
+       F(5000000, P_BI_TCXO, 1, 25, 96),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(250000000, P_GPLL4_OUT_EVEN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_emac_clk_src = {
+       .cmd_rcgr = 0x47020,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_5,
+       .freq_tbl = ftbl_gcc_emac_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_emac_clk_src",
+               .parent_data = gcc_parents_5,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_emac_ptp_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(230400000, P_GPLL5_OUT_MAIN, 3.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_emac_ptp_clk_src = {
+       .cmd_rcgr = 0x47038,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_2,
+       .freq_tbl = ftbl_gcc_emac_ptp_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_emac_ptp_clk_src",
+               .parent_data = gcc_parents_2,
+               .num_parents = 6,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_gp1_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(25000000, P_GPLL0_OUT_EVEN, 12, 0, 0),
+       F(50000000, P_GPLL0_OUT_EVEN, 6, 0, 0),
+       F(100000000, P_GPLL0_OUT_MAIN, 6, 0, 0),
+       F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_gp1_clk_src = {
+       .cmd_rcgr = 0x2b004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_3,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_gp1_clk_src",
+               .parent_data = gcc_parents_3,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_gp2_clk_src = {
+       .cmd_rcgr = 0x2c004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_3,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_gp2_clk_src",
+               .parent_data = gcc_parents_3,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_gp3_clk_src = {
+       .cmd_rcgr = 0x2d004,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_3,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_gp3_clk_src",
+               .parent_data = gcc_parents_3,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_pcie_aux_phy_clk_src = {
+       .cmd_rcgr = 0x37034,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_4,
+       .freq_tbl = ftbl_gcc_cpuss_rbcpr_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pcie_aux_phy_clk_src",
+               .parent_data = gcc_parents_4,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_pcie_rchng_phy_clk_src[] = {
+       F(100000000, P_GPLL0_OUT_EVEN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_pcie_rchng_phy_clk_src = {
+       .cmd_rcgr = 0x37050,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_3,
+       .freq_tbl = ftbl_gcc_pcie_rchng_phy_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pcie_rchng_phy_clk_src",
+               .parent_data = gcc_parents_3,
+               .num_parents = 5,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_pdm2_clk_src[] = {
+       F(9600000, P_BI_TCXO, 2, 0, 0),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(60000000, P_GPLL0_OUT_MAIN, 10, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_pdm2_clk_src = {
+       .cmd_rcgr = 0x19010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_pdm2_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_pdm2_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
+       .cmd_rcgr = 0xf00c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_gp1_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_sdcc1_apps_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_master_clk_src[] = {
+       F(200000000, P_GPLL0_OUT_EVEN, 1.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_usb30_master_clk_src = {
+       .cmd_rcgr = 0xb024,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_usb30_master_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb30_master_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_mock_utmi_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_usb30_mock_utmi_clk_src = {
+       .cmd_rcgr = 0xb03c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_0,
+       .freq_tbl = ftbl_gcc_usb30_mock_utmi_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb30_mock_utmi_clk_src",
+               .parent_data = gcc_parents_0,
+               .num_parents = 4,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gcc_usb3_phy_aux_clk_src[] = {
+       F(1000000, P_BI_TCXO, 1, 5, 96),
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gcc_usb3_phy_aux_clk_src = {
+       .cmd_rcgr = 0xb064,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = gcc_parent_map_4,
+       .freq_tbl = ftbl_gcc_usb3_phy_aux_clk_src,
+       .clkr.hw.init = &(struct clk_init_data){
+               .name = "gcc_usb3_phy_aux_clk_src",
+               .parent_data = gcc_parents_4,
+               .num_parents = 3,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_branch gcc_ahb_pcie_link_clk = {
+       .halt_reg = 0x22004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x22004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ahb_pcie_link_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_ahb_clk = {
+       .halt_reg = 0x10004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d008,
+               .enable_mask = BIT(14),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup1_i2c_apps_clk = {
+       .halt_reg = 0x11008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x11008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup1_i2c_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup1_i2c_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup1_spi_apps_clk = {
+       .halt_reg = 0x11004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x11004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup1_spi_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup1_spi_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup2_i2c_apps_clk = {
+       .halt_reg = 0x13008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup2_i2c_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup2_i2c_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup2_spi_apps_clk = {
+       .halt_reg = 0x13004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup2_spi_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup2_spi_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup3_i2c_apps_clk = {
+       .halt_reg = 0x15008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup3_i2c_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup3_i2c_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup3_spi_apps_clk = {
+       .halt_reg = 0x15004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup3_spi_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup3_spi_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup4_i2c_apps_clk = {
+       .halt_reg = 0x17008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x17008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup4_i2c_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup4_i2c_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_qup4_spi_apps_clk = {
+       .halt_reg = 0x17004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x17004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_qup4_spi_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_qup4_spi_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_uart1_apps_clk = {
+       .halt_reg = 0x12004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x12004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_uart1_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_uart1_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_uart2_apps_clk = {
+       .halt_reg = 0x14004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x14004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_uart2_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_uart2_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_uart3_apps_clk = {
+       .halt_reg = 0x16004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x16004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_uart3_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_uart3_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_blsp1_uart4_apps_clk = {
+       .halt_reg = 0x18004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x18004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_blsp1_uart4_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_blsp1_uart4_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+       .halt_reg = 0x1c004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x1c004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x6d008,
+               .enable_mask = BIT(10),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_boot_rom_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ce1_ahb_clk = {
+       .halt_reg = 0x2100c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x2100c,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x6d008,
+               .enable_mask = BIT(3),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ce1_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ce1_axi_clk = {
+       .halt_reg = 0x21008,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d008,
+               .enable_mask = BIT(4),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ce1_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_ce1_clk = {
+       .halt_reg = 0x21004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d008,
+               .enable_mask = BIT(5),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_ce1_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_cpuss_rbcpr_clk = {
+       .halt_reg = 0x24008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x24008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_cpuss_rbcpr_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_cpuss_rbcpr_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_eth_axi_clk = {
+       .halt_reg = 0x4701c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x4701c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_eth_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_eth_ptp_clk = {
+       .halt_reg = 0x47018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x47018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_eth_ptp_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_emac_ptp_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_eth_rgmii_clk = {
+       .halt_reg = 0x47010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x47010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_eth_rgmii_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_emac_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_eth_slave_ahb_clk = {
+       .halt_reg = 0x47014,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x47014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_eth_slave_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gp1_clk = {
+       .halt_reg = 0x2b000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x2b000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gp1_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_gp1_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gp2_clk = {
+       .halt_reg = 0x2c000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x2c000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gp2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_gp2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_gp3_clk = {
+       .halt_reg = 0x2d000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x2d000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_gp3_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_gp3_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_0_clkref_clk = {
+       .halt_reg = 0x88004,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x88004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_0_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_aux_clk = {
+       .halt_reg = 0x37024,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(3),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_aux_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_cfg_ahb_clk = {
+       .halt_reg = 0x3701c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(2),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_cfg_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_mstr_axi_clk = {
+       .halt_reg = 0x37018,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(1),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_mstr_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_pipe_clk = {
+       .halt_reg = 0x3702c,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(4),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_pipe_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_rchng_phy_clk = {
+       .halt_reg = 0x37020,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(7),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_rchng_phy_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_pcie_rchng_phy_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_sleep_clk = {
+       .halt_reg = 0x37028,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(6),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_sleep_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_pcie_aux_phy_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_slv_axi_clk = {
+       .halt_reg = 0x37014,
+       .halt_check = BRANCH_HALT_VOTED,
+       .hwcg_reg = 0x37014,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_slv_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pcie_slv_q2a_axi_clk = {
+       .halt_reg = 0x37010,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x6d010,
+               .enable_mask = BIT(5),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pcie_slv_q2a_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pdm2_clk = {
+       .halt_reg = 0x1900c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1900c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pdm2_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_pdm2_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pdm_ahb_clk = {
+       .halt_reg = 0x19004,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0x19004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0x19004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pdm_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_pdm_xo4_clk = {
+       .halt_reg = 0x19008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x19008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_pdm_xo4_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+       .halt_reg = 0xf008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sdcc1_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+       .halt_reg = 0xf004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xf004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_sdcc1_apps_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_sdcc1_apps_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_master_clk = {
+       .halt_reg = 0xb010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb010,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_master_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_usb30_master_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_mock_utmi_clk = {
+       .halt_reg = 0xb020,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb020,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_mock_utmi_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_usb30_mock_utmi_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_mstr_axi_clk = {
+       .halt_reg = 0xb014,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb014,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_mstr_axi_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_sleep_clk = {
+       .halt_reg = 0xb01c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb01c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_sleep_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb30_slv_ahb_clk = {
+       .halt_reg = 0xb018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb018,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb30_slv_ahb_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_phy_aux_clk = {
+       .halt_reg = 0xb058,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xb058,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_phy_aux_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &gcc_usb3_phy_aux_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_phy_pipe_clk = {
+       .halt_reg = 0xb05c,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0xb05c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_phy_pipe_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb3_prim_clkref_clk = {
+       .halt_reg = 0x88000,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x88000,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb3_prim_clkref_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_usb_phy_cfg_ahb2phy_clk = {
+       .halt_reg = 0xe004,
+       .halt_check = BRANCH_HALT,
+       .hwcg_reg = 0xe004,
+       .hwcg_bit = 1,
+       .clkr = {
+               .enable_reg = 0xe004,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_usb_phy_cfg_ahb2phy_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gcc_xo_pcie_link_clk = {
+       .halt_reg = 0x22008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x22008,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_xo_pcie_link_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct gdsc usb30_gdsc = {
+       .gdscr = 0x0b004,
+       .pd = {
+               .name = "usb30_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie_gdsc = {
+       .gdscr = 0x37004,
+       .pd = {
+               .name = "pcie_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc emac_gdsc = {
+       .gdscr = 0x47004,
+       .pd = {
+               .name = "emac_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct clk_regmap *gcc_sdx55_clocks[] = {
+       [GCC_AHB_PCIE_LINK_CLK] = &gcc_ahb_pcie_link_clk.clkr,
+       [GCC_BLSP1_AHB_CLK] = &gcc_blsp1_ahb_clk.clkr,
+       [GCC_BLSP1_QUP1_I2C_APPS_CLK] = &gcc_blsp1_qup1_i2c_apps_clk.clkr,
+       [GCC_BLSP1_QUP1_I2C_APPS_CLK_SRC] =
+               &gcc_blsp1_qup1_i2c_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP1_SPI_APPS_CLK] = &gcc_blsp1_qup1_spi_apps_clk.clkr,
+       [GCC_BLSP1_QUP1_SPI_APPS_CLK_SRC] =
+               &gcc_blsp1_qup1_spi_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP2_I2C_APPS_CLK] = &gcc_blsp1_qup2_i2c_apps_clk.clkr,
+       [GCC_BLSP1_QUP2_I2C_APPS_CLK_SRC] =
+               &gcc_blsp1_qup2_i2c_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP2_SPI_APPS_CLK] = &gcc_blsp1_qup2_spi_apps_clk.clkr,
+       [GCC_BLSP1_QUP2_SPI_APPS_CLK_SRC] =
+               &gcc_blsp1_qup2_spi_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP3_I2C_APPS_CLK] = &gcc_blsp1_qup3_i2c_apps_clk.clkr,
+       [GCC_BLSP1_QUP3_I2C_APPS_CLK_SRC] =
+               &gcc_blsp1_qup3_i2c_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP3_SPI_APPS_CLK] = &gcc_blsp1_qup3_spi_apps_clk.clkr,
+       [GCC_BLSP1_QUP3_SPI_APPS_CLK_SRC] =
+               &gcc_blsp1_qup3_spi_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP4_I2C_APPS_CLK] = &gcc_blsp1_qup4_i2c_apps_clk.clkr,
+       [GCC_BLSP1_QUP4_I2C_APPS_CLK_SRC] =
+               &gcc_blsp1_qup4_i2c_apps_clk_src.clkr,
+       [GCC_BLSP1_QUP4_SPI_APPS_CLK] = &gcc_blsp1_qup4_spi_apps_clk.clkr,
+       [GCC_BLSP1_QUP4_SPI_APPS_CLK_SRC] =
+               &gcc_blsp1_qup4_spi_apps_clk_src.clkr,
+       [GCC_BLSP1_UART1_APPS_CLK] = &gcc_blsp1_uart1_apps_clk.clkr,
+       [GCC_BLSP1_UART1_APPS_CLK_SRC] = &gcc_blsp1_uart1_apps_clk_src.clkr,
+       [GCC_BLSP1_UART2_APPS_CLK] = &gcc_blsp1_uart2_apps_clk.clkr,
+       [GCC_BLSP1_UART2_APPS_CLK_SRC] = &gcc_blsp1_uart2_apps_clk_src.clkr,
+       [GCC_BLSP1_UART3_APPS_CLK] = &gcc_blsp1_uart3_apps_clk.clkr,
+       [GCC_BLSP1_UART3_APPS_CLK_SRC] = &gcc_blsp1_uart3_apps_clk_src.clkr,
+       [GCC_BLSP1_UART4_APPS_CLK] = &gcc_blsp1_uart4_apps_clk.clkr,
+       [GCC_BLSP1_UART4_APPS_CLK_SRC] = &gcc_blsp1_uart4_apps_clk_src.clkr,
+       [GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+       [GCC_CE1_AHB_CLK] = &gcc_ce1_ahb_clk.clkr,
+       [GCC_CE1_AXI_CLK] = &gcc_ce1_axi_clk.clkr,
+       [GCC_CE1_CLK] = &gcc_ce1_clk.clkr,
+       [GCC_CPUSS_AHB_CLK_SRC] = &gcc_cpuss_ahb_clk_src.clkr,
+       [GCC_CPUSS_RBCPR_CLK] = &gcc_cpuss_rbcpr_clk.clkr,
+       [GCC_CPUSS_RBCPR_CLK_SRC] = &gcc_cpuss_rbcpr_clk_src.clkr,
+       [GCC_EMAC_CLK_SRC] = &gcc_emac_clk_src.clkr,
+       [GCC_EMAC_PTP_CLK_SRC] = &gcc_emac_ptp_clk_src.clkr,
+       [GCC_ETH_AXI_CLK] = &gcc_eth_axi_clk.clkr,
+       [GCC_ETH_PTP_CLK] = &gcc_eth_ptp_clk.clkr,
+       [GCC_ETH_RGMII_CLK] = &gcc_eth_rgmii_clk.clkr,
+       [GCC_ETH_SLAVE_AHB_CLK] = &gcc_eth_slave_ahb_clk.clkr,
+       [GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+       [GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr,
+       [GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+       [GCC_GP2_CLK_SRC] = &gcc_gp2_clk_src.clkr,
+       [GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+       [GCC_GP3_CLK_SRC] = &gcc_gp3_clk_src.clkr,
+       [GCC_PCIE_0_CLKREF_CLK] = &gcc_pcie_0_clkref_clk.clkr,
+       [GCC_PCIE_AUX_CLK] = &gcc_pcie_aux_clk.clkr,
+       [GCC_PCIE_AUX_PHY_CLK_SRC] = &gcc_pcie_aux_phy_clk_src.clkr,
+       [GCC_PCIE_CFG_AHB_CLK] = &gcc_pcie_cfg_ahb_clk.clkr,
+       [GCC_PCIE_MSTR_AXI_CLK] = &gcc_pcie_mstr_axi_clk.clkr,
+       [GCC_PCIE_PIPE_CLK] = &gcc_pcie_pipe_clk.clkr,
+       [GCC_PCIE_RCHNG_PHY_CLK] = &gcc_pcie_rchng_phy_clk.clkr,
+       [GCC_PCIE_RCHNG_PHY_CLK_SRC] = &gcc_pcie_rchng_phy_clk_src.clkr,
+       [GCC_PCIE_SLEEP_CLK] = &gcc_pcie_sleep_clk.clkr,
+       [GCC_PCIE_SLV_AXI_CLK] = &gcc_pcie_slv_axi_clk.clkr,
+       [GCC_PCIE_SLV_Q2A_AXI_CLK] = &gcc_pcie_slv_q2a_axi_clk.clkr,
+       [GCC_PDM2_CLK] = &gcc_pdm2_clk.clkr,
+       [GCC_PDM2_CLK_SRC] = &gcc_pdm2_clk_src.clkr,
+       [GCC_PDM_AHB_CLK] = &gcc_pdm_ahb_clk.clkr,
+       [GCC_PDM_XO4_CLK] = &gcc_pdm_xo4_clk.clkr,
+       [GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+       [GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+       [GCC_SDCC1_APPS_CLK_SRC] = &gcc_sdcc1_apps_clk_src.clkr,
+       [GCC_USB30_MASTER_CLK] = &gcc_usb30_master_clk.clkr,
+       [GCC_USB30_MASTER_CLK_SRC] = &gcc_usb30_master_clk_src.clkr,
+       [GCC_USB30_MOCK_UTMI_CLK] = &gcc_usb30_mock_utmi_clk.clkr,
+       [GCC_USB30_MOCK_UTMI_CLK_SRC] = &gcc_usb30_mock_utmi_clk_src.clkr,
+       [GCC_USB30_MSTR_AXI_CLK] = &gcc_usb30_mstr_axi_clk.clkr,
+       [GCC_USB30_SLEEP_CLK] = &gcc_usb30_sleep_clk.clkr,
+       [GCC_USB30_SLV_AHB_CLK] = &gcc_usb30_slv_ahb_clk.clkr,
+       [GCC_USB3_PHY_AUX_CLK] = &gcc_usb3_phy_aux_clk.clkr,
+       [GCC_USB3_PHY_AUX_CLK_SRC] = &gcc_usb3_phy_aux_clk_src.clkr,
+       [GCC_USB3_PHY_PIPE_CLK] = &gcc_usb3_phy_pipe_clk.clkr,
+       [GCC_USB3_PRIM_CLKREF_CLK] = &gcc_usb3_prim_clkref_clk.clkr,
+       [GCC_USB_PHY_CFG_AHB2PHY_CLK] = &gcc_usb_phy_cfg_ahb2phy_clk.clkr,
+       [GCC_XO_PCIE_LINK_CLK] = &gcc_xo_pcie_link_clk.clkr,
+       [GPLL0] = &gpll0.clkr,
+       [GPLL0_OUT_EVEN] = &gpll0_out_even.clkr,
+       [GPLL4] = &gpll4.clkr,
+       [GPLL4_OUT_EVEN] = &gpll4_out_even.clkr,
+       [GPLL5] = &gpll5.clkr,
+};
+
+static const struct qcom_reset_map gcc_sdx55_resets[] = {
+       [GCC_EMAC_BCR] = { 0x47000 },
+       [GCC_PCIE_BCR] = { 0x37000 },
+       [GCC_PCIE_LINK_DOWN_BCR] = { 0x77000 },
+       [GCC_PCIE_PHY_BCR] = { 0x39000 },
+       [GCC_PCIE_PHY_COM_BCR] = { 0x78004 },
+       [GCC_QUSB2PHY_BCR] = { 0xd000 },
+       [GCC_USB30_BCR] = { 0xb000 },
+       [GCC_USB3_PHY_BCR] = { 0xc000 },
+       [GCC_USB3PHY_PHY_BCR] = { 0xc004 },
+       [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0xe000 },
+};
+
+static struct gdsc *gcc_sdx55_gdscs[] = {
+       [USB30_GDSC] = &usb30_gdsc,
+       [PCIE_GDSC] = &pcie_gdsc,
+       [EMAC_GDSC] = &emac_gdsc,
+};
+
+static const struct regmap_config gcc_sdx55_regmap_config = {
+       .reg_bits       = 32,
+       .reg_stride     = 4,
+       .val_bits       = 32,
+       .max_register   = 0x9b040,
+       .fast_io        = true,
+};
+
+static const struct qcom_cc_desc gcc_sdx55_desc = {
+       .config = &gcc_sdx55_regmap_config,
+       .clks = gcc_sdx55_clocks,
+       .num_clks = ARRAY_SIZE(gcc_sdx55_clocks),
+       .resets = gcc_sdx55_resets,
+       .num_resets = ARRAY_SIZE(gcc_sdx55_resets),
+       .gdscs = gcc_sdx55_gdscs,
+       .num_gdscs = ARRAY_SIZE(gcc_sdx55_gdscs),
+};
+
+static const struct of_device_id gcc_sdx55_match_table[] = {
+       { .compatible = "qcom,gcc-sdx55" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, gcc_sdx55_match_table);
+
+static int gcc_sdx55_probe(struct platform_device *pdev)
+{
+       struct regmap *regmap;
+
+       regmap = qcom_cc_map(pdev, &gcc_sdx55_desc);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       /*
+        * Keep the clocks always-ON as they are critical to the functioning
+        * of the system:
+        * GCC_SYS_NOC_CPUSS_AHB_CLK, GCC_CPUSS_AHB_CLK, GCC_CPUSS_GNOC_CLK
+        */
+       regmap_update_bits(regmap, 0x6d008, BIT(0), BIT(0));
+       regmap_update_bits(regmap, 0x6d008, BIT(21), BIT(21));
+       regmap_update_bits(regmap, 0x6d008, BIT(22), BIT(22));
+
+       return qcom_cc_really_probe(pdev, &gcc_sdx55_desc, regmap);
+}
+
+static struct platform_driver gcc_sdx55_driver = {
+       .probe = gcc_sdx55_probe,
+       .driver = {
+               .name = "gcc-sdx55",
+               .of_match_table = gcc_sdx55_match_table,
+       },
+};
+
+static int __init gcc_sdx55_init(void)
+{
+       return platform_driver_register(&gcc_sdx55_driver);
+}
+subsys_initcall(gcc_sdx55_init);
+
+static void __exit gcc_sdx55_exit(void)
+{
+       platform_driver_unregister(&gcc_sdx55_driver);
+}
+module_exit(gcc_sdx55_exit);
+
+MODULE_DESCRIPTION("QTI GCC SDX55 Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/qcom/lpass-gfm-sm8250.c b/drivers/clk/qcom/lpass-gfm-sm8250.c
new file mode 100644 (file)
index 0000000..d366c7c
--- /dev/null
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * LPASS Audio CC and Always ON CC Glitch Free Mux clock driver
+ *
+ * Copyright (c) 2020 Linaro Ltd.
+ * Author: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <dt-bindings/clock/qcom,sm8250-lpass-audiocc.h>
+#include <dt-bindings/clock/qcom,sm8250-lpass-aoncc.h>
+
+struct lpass_gfm {
+       struct device *dev;
+       void __iomem *base;
+};
+
+struct clk_gfm {
+       unsigned int mux_reg;
+       unsigned int mux_mask;
+       struct clk_hw   hw;
+       struct lpass_gfm *priv;
+       void __iomem *gfm_mux;
+};
+
+#define GFM_MASK       BIT(1)
+#define to_clk_gfm(_hw) container_of(_hw, struct clk_gfm, hw)
+
+static u8 clk_gfm_get_parent(struct clk_hw *hw)
+{
+       struct clk_gfm *clk = to_clk_gfm(hw);
+
+       return readl(clk->gfm_mux) & GFM_MASK;
+}
+
+static int clk_gfm_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct clk_gfm *clk = to_clk_gfm(hw);
+       unsigned int val;
+
+       val = readl(clk->gfm_mux);
+
+       if (index)
+               val |= GFM_MASK;
+       else
+               val &= ~GFM_MASK;
+
+       writel(val, clk->gfm_mux);
+
+       return 0;
+}
+
+static const struct clk_ops clk_gfm_ops = {
+       .get_parent = clk_gfm_get_parent,
+       .set_parent = clk_gfm_set_parent,
+       .determine_rate = __clk_mux_determine_rate,
+};
+
+static struct clk_gfm lpass_gfm_va_mclk = {
+       .mux_reg = 0x20000,
+       .mux_mask = BIT(0),
+       .hw.init = &(struct clk_init_data) {
+               .name = "VA_MCLK",
+               .ops = &clk_gfm_ops,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .num_parents = 2,
+               .parent_data = (const struct clk_parent_data[]){
+                       {
+                               .index = 0,
+                               .fw_name = "LPASS_CLK_ID_TX_CORE_MCLK",
+                       }, {
+                               .index = 1,
+                               .fw_name = "LPASS_CLK_ID_VA_CORE_MCLK",
+                       },
+               },
+       },
+};
+
+static struct clk_gfm lpass_gfm_tx_npl = {
+       .mux_reg = 0x20000,
+       .mux_mask = BIT(0),
+       .hw.init = &(struct clk_init_data) {
+               .name = "TX_NPL",
+               .ops = &clk_gfm_ops,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .parent_data = (const struct clk_parent_data[]){
+                       {
+                               .index = 0,
+                               .fw_name = "LPASS_CLK_ID_TX_CORE_NPL_MCLK",
+                       }, {
+                               .index = 1,
+                               .fw_name = "LPASS_CLK_ID_VA_CORE_2X_MCLK",
+                       },
+               },
+               .num_parents = 2,
+       },
+};
+
+static struct clk_gfm lpass_gfm_wsa_mclk = {
+       .mux_reg = 0x220d8,
+       .mux_mask = BIT(0),
+       .hw.init = &(struct clk_init_data) {
+               .name = "WSA_MCLK",
+               .ops = &clk_gfm_ops,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .parent_data = (const struct clk_parent_data[]){
+                       {
+                               .index = 0,
+                               .fw_name = "LPASS_CLK_ID_TX_CORE_MCLK",
+                       }, {
+                               .index = 1,
+                               .fw_name = "LPASS_CLK_ID_WSA_CORE_MCLK",
+                       },
+               },
+               .num_parents = 2,
+       },
+};
+
+static struct clk_gfm lpass_gfm_wsa_npl = {
+       .mux_reg = 0x220d8,
+       .mux_mask = BIT(0),
+       .hw.init = &(struct clk_init_data) {
+               .name = "WSA_NPL",
+               .ops = &clk_gfm_ops,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .parent_data = (const struct clk_parent_data[]){
+                       {
+                               .index = 0,
+                               .fw_name = "LPASS_CLK_ID_TX_CORE_NPL_MCLK",
+                       }, {
+                               .index = 1,
+                               .fw_name = "LPASS_CLK_ID_WSA_CORE_NPL_MCLK",
+                       },
+               },
+               .num_parents = 2,
+       },
+};
+
+static struct clk_gfm lpass_gfm_rx_mclk_mclk2 = {
+       .mux_reg = 0x240d8,
+       .mux_mask = BIT(0),
+       .hw.init = &(struct clk_init_data) {
+               .name = "RX_MCLK_MCLK2",
+               .ops = &clk_gfm_ops,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .parent_data = (const struct clk_parent_data[]){
+                       {
+                               .index = 0,
+                               .fw_name = "LPASS_CLK_ID_TX_CORE_MCLK",
+                       }, {
+                               .index = 1,
+                               .fw_name = "LPASS_CLK_ID_RX_CORE_MCLK",
+                       },
+               },
+               .num_parents = 2,
+       },
+};
+
+static struct clk_gfm lpass_gfm_rx_npl = {
+       .mux_reg = 0x240d8,
+       .mux_mask = BIT(0),
+       .hw.init = &(struct clk_init_data) {
+               .name = "RX_NPL",
+               .ops = &clk_gfm_ops,
+               .flags = CLK_SET_RATE_PARENT | CLK_OPS_PARENT_ENABLE,
+               .parent_data = (const struct clk_parent_data[]){
+                       {
+                               .index = 0,
+                               .fw_name = "LPASS_CLK_ID_TX_CORE_NPL_MCLK",
+                       }, {
+                               .index = 1,
+                               .fw_name = "LPASS_CLK_ID_RX_CORE_NPL_MCLK",
+                       },
+               },
+               .num_parents = 2,
+       },
+};
+
+static struct clk_gfm *aoncc_gfm_clks[] = {
+       [LPASS_CDC_VA_MCLK]             = &lpass_gfm_va_mclk,
+       [LPASS_CDC_TX_NPL]              = &lpass_gfm_tx_npl,
+};
+
+static struct clk_hw_onecell_data aoncc_hw_onecell_data = {
+       .hws = {
+               [LPASS_CDC_VA_MCLK]     = &lpass_gfm_va_mclk.hw,
+               [LPASS_CDC_TX_NPL]      = &lpass_gfm_tx_npl.hw,
+       },
+       .num = ARRAY_SIZE(aoncc_gfm_clks),
+};
+
+static struct clk_gfm *audiocc_gfm_clks[] = {
+       [LPASS_CDC_WSA_NPL]             = &lpass_gfm_wsa_npl,
+       [LPASS_CDC_WSA_MCLK]            = &lpass_gfm_wsa_mclk,
+       [LPASS_CDC_RX_NPL]              = &lpass_gfm_rx_npl,
+       [LPASS_CDC_RX_MCLK_MCLK2]       = &lpass_gfm_rx_mclk_mclk2,
+};
+
+static struct clk_hw_onecell_data audiocc_hw_onecell_data = {
+       .hws = {
+               [LPASS_CDC_WSA_NPL]     = &lpass_gfm_wsa_npl.hw,
+               [LPASS_CDC_WSA_MCLK]    = &lpass_gfm_wsa_mclk.hw,
+               [LPASS_CDC_RX_NPL]      = &lpass_gfm_rx_npl.hw,
+               [LPASS_CDC_RX_MCLK_MCLK2] = &lpass_gfm_rx_mclk_mclk2.hw,
+       },
+       .num = ARRAY_SIZE(audiocc_gfm_clks),
+};
+
+struct lpass_gfm_data {
+       struct clk_hw_onecell_data *onecell_data;
+       struct clk_gfm **gfm_clks;
+};
+
+static struct lpass_gfm_data audiocc_data = {
+       .onecell_data = &audiocc_hw_onecell_data,
+       .gfm_clks = audiocc_gfm_clks,
+};
+
+static struct lpass_gfm_data aoncc_data = {
+       .onecell_data = &aoncc_hw_onecell_data,
+       .gfm_clks = aoncc_gfm_clks,
+};
+
+static int lpass_gfm_clk_driver_probe(struct platform_device *pdev)
+{
+       const struct lpass_gfm_data *data;
+       struct device *dev = &pdev->dev;
+       struct clk_gfm *gfm;
+       struct lpass_gfm *cc;
+       int err, i;
+
+       data = of_device_get_match_data(dev);
+       if (!data)
+               return -EINVAL;
+
+       cc = devm_kzalloc(dev, sizeof(*cc), GFP_KERNEL);
+       if (!cc)
+               return -ENOMEM;
+
+       cc->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(cc->base))
+               return PTR_ERR(cc->base);
+
+       pm_runtime_enable(dev);
+       err = pm_clk_create(dev);
+       if (err)
+               goto pm_clk_err;
+
+       err = of_pm_clk_add_clks(dev);
+       if (err < 0) {
+               dev_dbg(dev, "Failed to get lpass core voting clocks\n");
+               goto clk_reg_err;
+       }
+
+       for (i = 0; i < data->onecell_data->num; i++) {
+               if (!data->gfm_clks[i])
+                       continue;
+
+               gfm = data->gfm_clks[i];
+               gfm->priv = cc;
+               gfm->gfm_mux = cc->base;
+               gfm->gfm_mux = gfm->gfm_mux + data->gfm_clks[i]->mux_reg;
+
+               err = devm_clk_hw_register(dev, &data->gfm_clks[i]->hw);
+               if (err)
+                       goto clk_reg_err;
+
+       }
+
+       err = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+                                         data->onecell_data);
+       if (err)
+               goto clk_reg_err;
+
+       return 0;
+
+clk_reg_err:
+       pm_clk_destroy(dev);
+pm_clk_err:
+       pm_runtime_disable(dev);
+       return err;
+}
+
+static const struct of_device_id lpass_gfm_clk_match_table[] = {
+       {
+               .compatible = "qcom,sm8250-lpass-aoncc",
+               .data = &aoncc_data,
+       },
+       {
+               .compatible = "qcom,sm8250-lpass-audiocc",
+               .data = &audiocc_data,
+       },
+       { }
+};
+MODULE_DEVICE_TABLE(of, lpass_gfm_clk_match_table);
+
+static const struct dev_pm_ops lpass_gfm_pm_ops = {
+       SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
+};
+
+static struct platform_driver lpass_gfm_clk_driver = {
+       .probe          = lpass_gfm_clk_driver_probe,
+       .driver         = {
+               .name   = "lpass-gfm-clk",
+               .of_match_table = lpass_gfm_clk_match_table,
+               .pm = &lpass_gfm_pm_ops,
+       },
+};
+module_platform_driver(lpass_gfm_clk_driver);
+MODULE_LICENSE("GPL v2");
index 228d08f..2e0ecc3 100644 (file)
@@ -356,12 +356,52 @@ static const struct qcom_cc_desc lpass_audio_hm_sc7180_desc = {
        .num_gdscs = ARRAY_SIZE(lpass_audio_hm_sc7180_gdscs),
 };
 
+static void lpass_pm_runtime_disable(void *data)
+{
+       pm_runtime_disable(data);
+}
+
+static void lpass_pm_clk_destroy(void *data)
+{
+       pm_clk_destroy(data);
+}
+
+static int lpass_create_pm_clks(struct platform_device *pdev)
+{
+       int ret;
+
+       pm_runtime_use_autosuspend(&pdev->dev);
+       pm_runtime_set_autosuspend_delay(&pdev->dev, 500);
+       pm_runtime_enable(&pdev->dev);
+
+       ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_runtime_disable, &pdev->dev);
+       if (ret)
+               return ret;
+
+       ret = pm_clk_create(&pdev->dev);
+       if (ret)
+               return ret;
+       ret = devm_add_action_or_reset(&pdev->dev, lpass_pm_clk_destroy, &pdev->dev);
+       if (ret)
+               return ret;
+
+       ret = pm_clk_add(&pdev->dev, "iface");
+       if (ret < 0)
+               dev_err(&pdev->dev, "failed to acquire iface clock\n");
+
+       return ret;
+}
+
 static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
 {
        const struct qcom_cc_desc *desc;
        struct regmap *regmap;
        int ret;
 
+       ret = lpass_create_pm_clks(pdev);
+       if (ret)
+               return ret;
+
        lpass_core_cc_sc7180_regmap_config.name = "lpass_audio_cc";
        desc = &lpass_audio_hm_sc7180_desc;
        ret = qcom_cc_probe_by_index(pdev, 1, desc);
@@ -386,12 +426,22 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
        clk_fabia_pll_configure(&lpass_lpaaudio_dig_pll, regmap,
                                &lpass_lpaaudio_dig_pll_config);
 
-       return qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap);
+       ret = qcom_cc_really_probe(pdev, &lpass_core_cc_sc7180_desc, regmap);
+
+       pm_runtime_mark_last_busy(&pdev->dev);
+       pm_runtime_put_autosuspend(&pdev->dev);
+
+       return ret;
 }
 
 static int lpass_hm_core_probe(struct platform_device *pdev)
 {
        const struct qcom_cc_desc *desc;
+       int ret;
+
+       ret = lpass_create_pm_clks(pdev);
+       if (ret)
+               return ret;
 
        lpass_core_cc_sc7180_regmap_config.name = "lpass_hm_core";
        desc = &lpass_core_hm_sc7180_desc;
@@ -399,61 +449,28 @@ static int lpass_hm_core_probe(struct platform_device *pdev)
        return qcom_cc_probe_by_index(pdev, 0, desc);
 }
 
-static const struct of_device_id lpass_core_cc_sc7180_match_table[] = {
+static const struct of_device_id lpass_hm_sc7180_match_table[] = {
        {
                .compatible = "qcom,sc7180-lpasshm",
-               .data = lpass_hm_core_probe,
        },
+       { }
+};
+MODULE_DEVICE_TABLE(of, lpass_hm_sc7180_match_table);
+
+static const struct of_device_id lpass_core_cc_sc7180_match_table[] = {
        {
                .compatible = "qcom,sc7180-lpasscorecc",
-               .data = lpass_core_cc_sc7180_probe,
        },
        { }
 };
 MODULE_DEVICE_TABLE(of, lpass_core_cc_sc7180_match_table);
 
-static int lpass_core_sc7180_probe(struct platform_device *pdev)
-{
-       int (*clk_probe)(struct platform_device *p);
-       int ret;
-
-       pm_runtime_enable(&pdev->dev);
-       ret = pm_clk_create(&pdev->dev);
-       if (ret)
-               goto disable_pm_runtime;
-
-       ret = pm_clk_add(&pdev->dev, "iface");
-       if (ret < 0) {
-               dev_err(&pdev->dev, "failed to acquire iface clock\n");
-               goto destroy_pm_clk;
-       }
-
-       ret = -EINVAL;
-       clk_probe = of_device_get_match_data(&pdev->dev);
-       if (!clk_probe)
-               goto destroy_pm_clk;
-
-       ret = clk_probe(pdev);
-       if (ret)
-               goto destroy_pm_clk;
-
-       return 0;
-
-destroy_pm_clk:
-       pm_clk_destroy(&pdev->dev);
-
-disable_pm_runtime:
-       pm_runtime_disable(&pdev->dev);
-
-       return ret;
-}
-
 static const struct dev_pm_ops lpass_core_cc_pm_ops = {
        SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
 };
 
 static struct platform_driver lpass_core_cc_sc7180_driver = {
-       .probe = lpass_core_sc7180_probe,
+       .probe = lpass_core_cc_sc7180_probe,
        .driver = {
                .name = "lpass_core_cc-sc7180",
                .of_match_table = lpass_core_cc_sc7180_match_table,
@@ -461,17 +478,43 @@ static struct platform_driver lpass_core_cc_sc7180_driver = {
        },
 };
 
-static int __init lpass_core_cc_sc7180_init(void)
+static const struct dev_pm_ops lpass_hm_pm_ops = {
+       SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
+};
+
+static struct platform_driver lpass_hm_sc7180_driver = {
+       .probe = lpass_hm_core_probe,
+       .driver = {
+               .name = "lpass_hm-sc7180",
+               .of_match_table = lpass_hm_sc7180_match_table,
+               .pm = &lpass_hm_pm_ops,
+       },
+};
+
+static int __init lpass_sc7180_init(void)
 {
-       return platform_driver_register(&lpass_core_cc_sc7180_driver);
+       int ret;
+
+       ret = platform_driver_register(&lpass_core_cc_sc7180_driver);
+       if (ret)
+               return ret;
+
+       ret = platform_driver_register(&lpass_hm_sc7180_driver);
+       if (ret) {
+               platform_driver_unregister(&lpass_core_cc_sc7180_driver);
+               return ret;
+       }
+
+       return 0;
 }
-subsys_initcall(lpass_core_cc_sc7180_init);
+subsys_initcall(lpass_sc7180_init);
 
-static void __exit lpass_core_cc_sc7180_exit(void)
+static void __exit lpass_sc7180_exit(void)
 {
+       platform_driver_unregister(&lpass_hm_sc7180_driver);
        platform_driver_unregister(&lpass_core_cc_sc7180_driver);
 }
-module_exit(lpass_core_cc_sc7180_exit);
+module_exit(lpass_sc7180_exit);
 
 MODULE_DESCRIPTION("QTI LPASS_CORE_CC SC7180 Driver");
 MODULE_LICENSE("GPL v2");
index 5f25a70..4146c1d 100644 (file)
@@ -121,7 +121,7 @@ sh73a0_cpg_register_clock(struct device_node *np, struct sh73a0_cpg *cpg,
                        (phy_no ? CPG_DSI1PHYCR : CPG_DSI0PHYCR);
 
                parent_name = phy_no ? "dsi1pck" : "dsi0pck";
-               mult = __raw_readl(dsi_reg);
+               mult = readl(dsi_reg);
                if (!(mult & 0x8000))
                        mult = 1;
                else
index fd54b9f..4a43ebe 100644 (file)
@@ -41,6 +41,7 @@ enum clk_ids {
        CLK_S2,
        CLK_S3,
        CLK_SDSRC,
+       CLK_RPCSRC,
        CLK_RINT,
 
        /* Module Clocks */
@@ -67,6 +68,12 @@ static const struct cpg_core_clk r8a774a1_core_clks[] __initconst = {
        DEF_FIXED(".s2",        CLK_S2,            CLK_PLL1_DIV2,  4, 1),
        DEF_FIXED(".s3",        CLK_S3,            CLK_PLL1_DIV2,  6, 1),
        DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
+       DEF_BASE(".rpcsrc",     CLK_RPCSRC, CLK_TYPE_GEN3_RPCSRC, CLK_PLL1),
+
+       DEF_BASE("rpc",         R8A774A1_CLK_RPC, CLK_TYPE_GEN3_RPC,
+                CLK_RPCSRC),
+       DEF_BASE("rpcd2",       R8A774A1_CLK_RPCD2, CLK_TYPE_GEN3_RPCD2,
+                R8A774A1_CLK_RPC),
 
        DEF_GEN3_OSC(".r",      CLK_RINT,          CLK_EXTAL,      32),
 
@@ -200,6 +207,7 @@ static const struct mssr_mod_clk r8a774a1_mod_clks[] __initconst = {
        DEF_MOD("can-fd",                914,   R8A774A1_CLK_S3D2),
        DEF_MOD("can-if1",               915,   R8A774A1_CLK_S3D4),
        DEF_MOD("can-if0",               916,   R8A774A1_CLK_S3D4),
+       DEF_MOD("rpc-if",                917,   R8A774A1_CLK_RPCD2),
        DEF_MOD("i2c6",                  918,   R8A774A1_CLK_S0D6),
        DEF_MOD("i2c5",                  919,   R8A774A1_CLK_S0D6),
        DEF_MOD("i2c-dvfs",              926,   R8A774A1_CLK_CP),
index f436691..6f04c40 100644 (file)
@@ -40,6 +40,7 @@ enum clk_ids {
        CLK_S2,
        CLK_S3,
        CLK_SDSRC,
+       CLK_RPCSRC,
        CLK_RINT,
 
        /* Module Clocks */
@@ -65,6 +66,12 @@ static const struct cpg_core_clk r8a774b1_core_clks[] __initconst = {
        DEF_FIXED(".s2",        CLK_S2,            CLK_PLL1_DIV2,  4, 1),
        DEF_FIXED(".s3",        CLK_S3,            CLK_PLL1_DIV2,  6, 1),
        DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
+       DEF_BASE(".rpcsrc",     CLK_RPCSRC, CLK_TYPE_GEN3_RPCSRC, CLK_PLL1),
+
+       DEF_BASE("rpc",         R8A774B1_CLK_RPC, CLK_TYPE_GEN3_RPC,
+                CLK_RPCSRC),
+       DEF_BASE("rpcd2",       R8A774B1_CLK_RPCD2, CLK_TYPE_GEN3_RPCD2,
+                R8A774B1_CLK_RPC),
 
        DEF_GEN3_OSC(".r",      CLK_RINT,          CLK_EXTAL,      32),
 
@@ -196,6 +203,7 @@ static const struct mssr_mod_clk r8a774b1_mod_clks[] __initconst = {
        DEF_MOD("can-fd",                914,   R8A774B1_CLK_S3D2),
        DEF_MOD("can-if1",               915,   R8A774B1_CLK_S3D4),
        DEF_MOD("can-if0",               916,   R8A774B1_CLK_S3D4),
+       DEF_MOD("rpc-if",                917,   R8A774B1_CLK_RPCD2),
        DEF_MOD("i2c6",                  918,   R8A774B1_CLK_S0D6),
        DEF_MOD("i2c5",                  919,   R8A774B1_CLK_S0D6),
        DEF_MOD("i2c-dvfs",              926,   R8A774B1_CLK_CP),
index 9fc9fa9..ed3a2cf 100644 (file)
@@ -44,6 +44,7 @@ enum clk_ids {
        CLK_S2,
        CLK_S3,
        CLK_SDSRC,
+       CLK_RPCSRC,
        CLK_RINT,
        CLK_OCO,
 
@@ -74,6 +75,13 @@ static const struct cpg_core_clk r8a774c0_core_clks[] __initconst = {
        DEF_FIXED(".s3",       CLK_S3,             CLK_PLL1,       6, 1),
        DEF_FIXED(".sdsrc",    CLK_SDSRC,          CLK_PLL1,       2, 1),
 
+       DEF_FIXED_RPCSRC_E3(".rpcsrc", CLK_RPCSRC, CLK_PLL0, CLK_PLL1),
+
+       DEF_BASE("rpc",         R8A774C0_CLK_RPC, CLK_TYPE_GEN3_RPC,
+                CLK_RPCSRC),
+       DEF_BASE("rpcd2",       R8A774C0_CLK_RPCD2, CLK_TYPE_GEN3_RPCD2,
+                R8A774C0_CLK_RPC),
+
        DEF_DIV6_RO(".r",      CLK_RINT,           CLK_EXTAL, CPG_RCKCR, 32),
 
        DEF_RATE(".oco",       CLK_OCO,            8 * 1000 * 1000),
@@ -199,6 +207,7 @@ static const struct mssr_mod_clk r8a774c0_mod_clks[] __initconst = {
        DEF_MOD("can-fd",                914,   R8A774C0_CLK_S3D2),
        DEF_MOD("can-if1",               915,   R8A774C0_CLK_S3D4),
        DEF_MOD("can-if0",               916,   R8A774C0_CLK_S3D4),
+       DEF_MOD("rpc-if",                917,   R8A774C0_CLK_RPCD2),
        DEF_MOD("i2c6",                  918,   R8A774C0_CLK_S3D2),
        DEF_MOD("i2c5",                  919,   R8A774C0_CLK_S3D2),
        DEF_MOD("i2c-dvfs",              926,   R8A774C0_CLK_CP),
index 17ebbac..aa5389b 100644 (file)
@@ -26,7 +26,6 @@
 #include <dt-bindings/clock/r8a779a0-cpg-mssr.h>
 
 #include "renesas-cpg-mssr.h"
-#include "rcar-gen3-cpg.h"
 
 enum rcar_r8a779a0_clk_types {
        CLK_TYPE_R8A779A0_MAIN = CLK_TYPE_CUSTOM,
@@ -84,6 +83,14 @@ enum clk_ids {
        DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_PLL2X_3X, CLK_MAIN, \
                 .offset = _offset)
 
+#define DEF_MDSEL(_name, _id, _md, _parent0, _div0, _parent1, _div1) \
+       DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_MDSEL,   \
+                (_parent0) << 16 | (_parent1),         \
+                .div = (_div0) << 16 | (_div1), .offset = _md)
+
+#define DEF_OSC(_name, _id, _parent, _div)             \
+       DEF_BASE(_name, _id, CLK_TYPE_R8A779A0_OSC, _parent, .div = _div)
+
 static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = {
        /* External Clock Inputs */
        DEF_INPUT("extal",  CLK_EXTAL),
@@ -136,15 +143,51 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = {
        DEF_DIV6P1("canfd",     R8A779A0_CLK_CANFD,     CLK_PLL5_DIV4,  0x878),
        DEF_DIV6P1("csi0",      R8A779A0_CLK_CSI0,      CLK_PLL5_DIV4,  0x880),
 
-       DEF_GEN3_OSC("osc",     R8A779A0_CLK_OSC,       CLK_EXTAL,      8),
-       DEF_GEN3_MDSEL("r",     R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1),
+       DEF_OSC("osc",          R8A779A0_CLK_OSC,       CLK_EXTAL,      8),
+       DEF_MDSEL("r",          R8A779A0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1),
 };
 
 static const struct mssr_mod_clk r8a779a0_mod_clks[] __initconst = {
+       DEF_MOD("csi40",        331,    R8A779A0_CLK_CSI0),
+       DEF_MOD("csi41",        400,    R8A779A0_CLK_CSI0),
+       DEF_MOD("csi42",        401,    R8A779A0_CLK_CSI0),
+       DEF_MOD("csi43",        402,    R8A779A0_CLK_CSI0),
        DEF_MOD("scif0",        702,    R8A779A0_CLK_S1D8),
        DEF_MOD("scif1",        703,    R8A779A0_CLK_S1D8),
        DEF_MOD("scif3",        704,    R8A779A0_CLK_S1D8),
        DEF_MOD("scif4",        705,    R8A779A0_CLK_S1D8),
+       DEF_MOD("vin00",        730,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin01",        731,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin02",        800,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin03",        801,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin04",        802,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin05",        803,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin06",        804,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin07",        805,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin10",        806,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin11",        807,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin12",        808,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin13",        809,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin14",        810,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin15",        811,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin16",        812,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin17",        813,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin20",        814,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin21",        815,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin22",        816,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin23",        817,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin24",        818,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin25",        819,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin26",        820,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin27",        821,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin30",        822,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin31",        823,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin32",        824,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin33",        825,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin34",        826,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin35",        827,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin36",        828,    R8A779A0_CLK_S1D1),
+       DEF_MOD("vin37",        829,    R8A779A0_CLK_S1D1),
 };
 
 static spinlock_t cpg_lock;
@@ -153,7 +196,7 @@ static const struct rcar_r8a779a0_cpg_pll_config *cpg_pll_config __initdata;
 static unsigned int cpg_clk_extalr __initdata;
 static u32 cpg_mode __initdata;
 
-struct clk * __init rcar_r8a779a0_cpg_clk_register(struct device *dev,
+static struct clk * __init rcar_r8a779a0_cpg_clk_register(struct device *dev,
        const struct cpg_core_clk *core, const struct cpg_mssr_info *info,
        struct clk **clks, void __iomem *base,
        struct raw_notifier_head *notifiers)
index 488f8b3..063b611 100644 (file)
@@ -224,10 +224,9 @@ static struct clk * __init cpg_z_clk_register(const char *name,
 #define CPG_SD_STP_MASK                (CPG_SD_STP_HCK | CPG_SD_STP_CK)
 #define CPG_SD_FC_MASK         (0x7 << 2 | 0x3 << 0)
 
-#define CPG_SD_DIV_TABLE_DATA(stp_hck, stp_ck, sd_srcfc, sd_fc, sd_div) \
+#define CPG_SD_DIV_TABLE_DATA(stp_hck, sd_srcfc, sd_fc, sd_div) \
 { \
        .val = ((stp_hck) ? CPG_SD_STP_HCK : 0) | \
-              ((stp_ck) ? CPG_SD_STP_CK : 0) | \
               ((sd_srcfc) << 2) | \
               ((sd_fc) << 0), \
        .div = (sd_div), \
@@ -247,36 +246,36 @@ struct sd_clock {
 };
 
 /* SDn divider
- *                     sd_srcfc   sd_fc   div
- * stp_hck   stp_ck    (div)      (div)     = sd_srcfc x sd_fc
- *-------------------------------------------------------------------
- *  0         0         0 (1)      1 (4)      4 : SDR104 / HS200 / HS400 (8 TAP)
- *  0         0         1 (2)      1 (4)      8 : SDR50
- *  1         0         2 (4)      1 (4)     16 : HS / SDR25
- *  1         0         3 (8)      1 (4)     32 : NS / SDR12
- *  1         0         4 (16)     1 (4)     64
- *  0         0         0 (1)      0 (2)      2
- *  0         0         1 (2)      0 (2)      4 : SDR104 / HS200 / HS400 (4 TAP)
- *  1         0         2 (4)      0 (2)      8
- *  1         0         3 (8)      0 (2)     16
- *  1         0         4 (16)     0 (2)     32
+ *           sd_srcfc   sd_fc   div
+ * stp_hck   (div)      (div)     = sd_srcfc x sd_fc
+ *---------------------------------------------------------
+ *  0         0 (1)      1 (4)      4 : SDR104 / HS200 / HS400 (8 TAP)
+ *  0         1 (2)      1 (4)      8 : SDR50
+ *  1         2 (4)      1 (4)     16 : HS / SDR25
+ *  1         3 (8)      1 (4)     32 : NS / SDR12
+ *  1         4 (16)     1 (4)     64
+ *  0         0 (1)      0 (2)      2
+ *  0         1 (2)      0 (2)      4 : SDR104 / HS200 / HS400 (4 TAP)
+ *  1         2 (4)      0 (2)      8
+ *  1         3 (8)      0 (2)     16
+ *  1         4 (16)     0 (2)     32
  *
  *  NOTE: There is a quirk option to ignore the first row of the dividers
  *  table when searching for suitable settings. This is because HS400 on
  *  early ES versions of H3 and M3-W requires a specific setting to work.
  */
 static const struct sd_div_table cpg_sd_div_table[] = {
-/*     CPG_SD_DIV_TABLE_DATA(stp_hck,  stp_ck,   sd_srcfc,   sd_fc,  sd_div) */
-       CPG_SD_DIV_TABLE_DATA(0,        0,        0,          1,        4),
-       CPG_SD_DIV_TABLE_DATA(0,        0,        1,          1,        8),
-       CPG_SD_DIV_TABLE_DATA(1,        0,        2,          1,       16),
-       CPG_SD_DIV_TABLE_DATA(1,        0,        3,          1,       32),
-       CPG_SD_DIV_TABLE_DATA(1,        0,        4,          1,       64),
-       CPG_SD_DIV_TABLE_DATA(0,        0,        0,          0,        2),
-       CPG_SD_DIV_TABLE_DATA(0,        0,        1,          0,        4),
-       CPG_SD_DIV_TABLE_DATA(1,        0,        2,          0,        8),
-       CPG_SD_DIV_TABLE_DATA(1,        0,        3,          0,       16),
-       CPG_SD_DIV_TABLE_DATA(1,        0,        4,          0,       32),
+/*     CPG_SD_DIV_TABLE_DATA(stp_hck,  sd_srcfc,   sd_fc,  sd_div) */
+       CPG_SD_DIV_TABLE_DATA(0,        0,          1,        4),
+       CPG_SD_DIV_TABLE_DATA(0,        1,          1,        8),
+       CPG_SD_DIV_TABLE_DATA(1,        2,          1,       16),
+       CPG_SD_DIV_TABLE_DATA(1,        3,          1,       32),
+       CPG_SD_DIV_TABLE_DATA(1,        4,          1,       64),
+       CPG_SD_DIV_TABLE_DATA(0,        0,          0,        2),
+       CPG_SD_DIV_TABLE_DATA(0,        1,          0,        4),
+       CPG_SD_DIV_TABLE_DATA(1,        2,          0,        8),
+       CPG_SD_DIV_TABLE_DATA(1,        3,          0,       16),
+       CPG_SD_DIV_TABLE_DATA(1,        4,          0,       32),
 };
 
 #define to_sd_clock(_hw) container_of(_hw, struct sd_clock, hw)
@@ -696,6 +695,34 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
                                                  cpg_rpcsrc_div_table,
                                                  &cpg_lock);
 
+       case CLK_TYPE_GEN3_E3_RPCSRC:
+               /*
+                * Register RPCSRC as fixed factor clock based on the
+                * MD[4:1] pins and CPG_RPCCKCR[4:3] register value for
+                * which has been set prior to booting the kernel.
+                */
+               value = (readl(base + CPG_RPCCKCR) & GENMASK(4, 3)) >> 3;
+
+               switch (value) {
+               case 0:
+                       div = 5;
+                       break;
+               case 1:
+                       div = 3;
+                       break;
+               case 2:
+                       parent = clks[core->parent >> 16];
+                       if (IS_ERR(parent))
+                               return ERR_CAST(parent);
+                       div = core->div;
+                       break;
+               case 3:
+               default:
+                       div = 2;
+                       break;
+               }
+               break;
+
        case CLK_TYPE_GEN3_RPC:
                return cpg_rpc_clk_register(core->name, base,
                                            __clk_get_name(parent), notifiers);
index c4ac80c..3d949c4 100644 (file)
@@ -24,6 +24,7 @@ enum rcar_gen3_clk_types {
        CLK_TYPE_GEN3_OSC,      /* OSC EXTAL predivider and fixed divider */
        CLK_TYPE_GEN3_RCKSEL,   /* Select parent/divider using RCKCR.CKSEL */
        CLK_TYPE_GEN3_RPCSRC,
+       CLK_TYPE_GEN3_E3_RPCSRC,
        CLK_TYPE_GEN3_RPC,
        CLK_TYPE_GEN3_RPCD2,
 
@@ -54,6 +55,10 @@ enum rcar_gen3_clk_types {
 #define DEF_GEN3_Z(_name, _id, _type, _parent, _div, _offset)  \
        DEF_BASE(_name, _id, _type, _parent, .div = _div, .offset = _offset)
 
+#define DEF_FIXED_RPCSRC_E3(_name, _id, _parent0, _parent1)    \
+       DEF_BASE(_name, _id, CLK_TYPE_GEN3_E3_RPCSRC,   \
+                (_parent0) << 16 | (_parent1), .div = 8)
+
 struct rcar_gen3_cpg_pll_config {
        u8 extal_div;
        u8 pll1_mult;
index d4c0298..3abafd7 100644 (file)
@@ -160,7 +160,7 @@ static int rcar_usb2_clock_sel_probe(struct platform_device *pdev)
        if (ret < 0)
                return ret;
 
-       priv->rsts = devm_reset_control_array_get(dev, true, false);
+       priv->rsts = devm_reset_control_array_get_shared(dev);
        if (IS_ERR(priv->rsts))
                return PTR_ERR(priv->rsts);
 
index 94db883..1c3215d 100644 (file)
@@ -119,7 +119,8 @@ static const u16 srstclr_for_v3u[] = {
 };
 
 /**
- * Clock Pulse Generator / Module Standby and Software Reset Private Data
+ * struct cpg_mssr_priv - Clock Pulse Generator / Module Standby
+ *                        and Software Reset Private Data
  *
  * @rcdev: Optional reset controller entity
  * @dev: CPG/MSSR device
index 47cd6c5..effd050 100644 (file)
@@ -11,67 +11,77 @@ config COMMON_CLK_ROCKCHIP
 if COMMON_CLK_ROCKCHIP
 config CLK_PX30
        bool "Rockchip PX30 clock controller support"
+       depends on (ARM64 || COMPILE_TEST)
        default y
        help
          Build the driver for PX30 Clock Driver.
 
 config CLK_RV110X
        bool "Rockchip RV110x clock controller support"
+       depends on (ARM || COMPILE_TEST)
        default y
        help
          Build the driver for RV110x Clock Driver.
 
 config CLK_RK3036
        bool "Rockchip RK3036 clock controller support"
+       depends on (ARM || COMPILE_TEST)
        default y
        help
          Build the driver for RK3036 Clock Driver.
 
 config CLK_RK312X
        bool "Rockchip RK312x clock controller support"
+       depends on (ARM || COMPILE_TEST)
        default y
        help
          Build the driver for RK312x Clock Driver.
 
 config CLK_RK3188
        bool "Rockchip RK3188 clock controller support"
+       depends on (ARM || COMPILE_TEST)
        default y
        help
          Build the driver for RK3188 Clock Driver.
 
 config CLK_RK322X
        bool "Rockchip RK322x clock controller support"
+       depends on (ARM || COMPILE_TEST)
        default y
        help
          Build the driver for RK322x Clock Driver.
 
 config CLK_RK3288
        bool "Rockchip RK3288 clock controller support"
-       depends on ARM
+       depends on (ARM || COMPILE_TEST)
        default y
        help
          Build the driver for RK3288 Clock Driver.
 
 config CLK_RK3308
        bool "Rockchip RK3308 clock controller support"
+       depends on (ARM64 || COMPILE_TEST)
        default y
        help
          Build the driver for RK3308 Clock Driver.
 
 config CLK_RK3328
        bool "Rockchip RK3328 clock controller support"
+       depends on (ARM64 || COMPILE_TEST)
        default y
        help
          Build the driver for RK3328 Clock Driver.
 
 config CLK_RK3368
        bool "Rockchip RK3368 clock controller support"
+       depends on (ARM64 || COMPILE_TEST)
        default y
        help
          Build the driver for RK3368 Clock Driver.
 
 config CLK_RK3399
        tristate "Rockchip RK3399 clock controller support"
+       depends on (ARM64 || COMPILE_TEST)
        default y
        help
          Build the driver for RK3399 Clock Driver.
index 730020f..0b76ad3 100644 (file)
@@ -255,19 +255,19 @@ static struct rockchip_clk_branch common_spdif_fracmux __initdata =
                        RK2928_CLKSEL_CON(5), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch common_uart0_fracmux __initdata =
-       MUX(SCLK_UART0, "sclk_uart0", mux_sclk_uart0_p, 0,
+       MUX(SCLK_UART0, "sclk_uart0", mux_sclk_uart0_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(13), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch common_uart1_fracmux __initdata =
-       MUX(SCLK_UART1, "sclk_uart1", mux_sclk_uart1_p, 0,
+       MUX(SCLK_UART1, "sclk_uart1", mux_sclk_uart1_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(14), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch common_uart2_fracmux __initdata =
-       MUX(SCLK_UART2, "sclk_uart2", mux_sclk_uart2_p, 0,
+       MUX(SCLK_UART2, "sclk_uart2", mux_sclk_uart2_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(15), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch common_uart3_fracmux __initdata =
-       MUX(SCLK_UART3, "sclk_uart3", mux_sclk_uart3_p, 0,
+       MUX(SCLK_UART3, "sclk_uart3", mux_sclk_uart3_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(16), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch common_clk_branches[] __initdata = {
@@ -408,28 +408,28 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = {
        COMPOSITE_NOMUX(0, "uart0_pre", "uart_src", 0,
                        RK2928_CLKSEL_CON(13), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(1), 8, GFLAGS),
-       COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_pre", 0,
+       COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(17), 0,
                        RK2928_CLKGATE_CON(1), 9, GFLAGS,
                        &common_uart0_fracmux),
        COMPOSITE_NOMUX(0, "uart1_pre", "uart_src", 0,
                        RK2928_CLKSEL_CON(14), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(1), 10, GFLAGS),
-       COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_pre", 0,
+       COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(18), 0,
                        RK2928_CLKGATE_CON(1), 11, GFLAGS,
                        &common_uart1_fracmux),
        COMPOSITE_NOMUX(0, "uart2_pre", "uart_src", 0,
                        RK2928_CLKSEL_CON(15), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(1), 12, GFLAGS),
-       COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_pre", 0,
+       COMPOSITE_FRACMUX(0, "uart2_frac", "uart2_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(19), 0,
                        RK2928_CLKGATE_CON(1), 13, GFLAGS,
                        &common_uart2_fracmux),
        COMPOSITE_NOMUX(0, "uart3_pre", "uart_src", 0,
                        RK2928_CLKSEL_CON(16), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(1), 14, GFLAGS),
-       COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_pre", 0,
+       COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(20), 0,
                        RK2928_CLKGATE_CON(1), 15, GFLAGS,
                        &common_uart3_fracmux),
@@ -449,7 +449,6 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = {
 
        /* hclk_cpu gates */
        GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", 0, RK2928_CLKGATE_CON(5), 6, GFLAGS),
-       GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS),
        GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 1, GFLAGS),
        GATE(0, "hclk_cpubus", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 8, GFLAGS),
        /* hclk_ahb2apb is part of a clk branch */
@@ -543,15 +542,15 @@ static struct clk_div_table div_aclk_cpu_t[] = {
 };
 
 static struct rockchip_clk_branch rk3066a_i2s0_fracmux __initdata =
-       MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, 0,
+       MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(2), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch rk3066a_i2s1_fracmux __initdata =
-       MUX(SCLK_I2S1, "sclk_i2s1", mux_sclk_i2s1_p, 0,
+       MUX(SCLK_I2S1, "sclk_i2s1", mux_sclk_i2s1_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(3), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch rk3066a_i2s2_fracmux __initdata =
-       MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, 0,
+       MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(4), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = {
@@ -615,27 +614,28 @@ static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = {
        COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0,
                        RK2928_CLKSEL_CON(2), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(0), 7, GFLAGS),
-       COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", 0,
+       COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(6), 0,
                        RK2928_CLKGATE_CON(0), 8, GFLAGS,
                        &rk3066a_i2s0_fracmux),
        COMPOSITE_NOMUX(0, "i2s1_pre", "i2s_src", 0,
                        RK2928_CLKSEL_CON(3), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(0), 9, GFLAGS),
-       COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_pre", 0,
+       COMPOSITE_FRACMUX(0, "i2s1_frac", "i2s1_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(7), 0,
                        RK2928_CLKGATE_CON(0), 10, GFLAGS,
                        &rk3066a_i2s1_fracmux),
        COMPOSITE_NOMUX(0, "i2s2_pre", "i2s_src", 0,
                        RK2928_CLKSEL_CON(4), 0, 7, DFLAGS,
                        RK2928_CLKGATE_CON(0), 11, GFLAGS),
-       COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_pre", 0,
+       COMPOSITE_FRACMUX(0, "i2s2_frac", "i2s2_pre", CLK_SET_RATE_PARENT,
                        RK2928_CLKSEL_CON(8), 0,
                        RK2928_CLKGATE_CON(0), 12, GFLAGS,
                        &rk3066a_i2s2_fracmux),
 
-       GATE(HCLK_I2S1, "hclk_i2s1", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS),
-       GATE(HCLK_I2S2, "hclk_i2s2", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS),
+       GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS),
+       GATE(HCLK_I2S1, "hclk_i2s1", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS),
+       GATE(HCLK_I2S2, "hclk_i2s2", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS),
        GATE(HCLK_CIF1, "hclk_cif1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 6, GFLAGS),
        GATE(HCLK_HDMI, "hclk_hdmi", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS),
 
@@ -728,6 +728,7 @@ static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = {
                        RK2928_CLKGATE_CON(0), 10, GFLAGS,
                        &rk3188_i2s0_fracmux),
 
+       GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 2, GFLAGS),
        GATE(0, "hclk_imem0", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS),
        GATE(0, "hclk_imem1", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 15, GFLAGS),
 
index b443169..336481b 100644 (file)
@@ -603,8 +603,7 @@ void rockchip_clk_protect_critical(const char *const clocks[],
        for (i = 0; i < nclocks; i++) {
                struct clk *clk = __clk_lookup(clocks[i]);
 
-               if (clk)
-                       clk_prepare_enable(clk);
+               clk_prepare_enable(clk);
        }
 }
 EXPORT_SYMBOL_GPL(rockchip_clk_protect_critical);
index 7e9c186..0441c4f 100644 (file)
@@ -2,10 +2,73 @@
 # Recent Exynos platforms should just select COMMON_CLK_SAMSUNG:
 config COMMON_CLK_SAMSUNG
        bool "Samsung Exynos clock controller support" if COMPILE_TEST
-       # Clocks on ARM64 SoCs (e.g. Exynos5433, Exynos7) are chosen by
-       # EXYNOS_ARM64_COMMON_CLK to avoid building them on ARMv7:
+       select S3C64XX_COMMON_CLK if ARM && ARCH_S3C64XX
+       select S5PV210_COMMON_CLK if ARM && ARCH_S5PV210
+       select EXYNOS_3250_COMMON_CLK if ARM && SOC_EXYNOS3250
+       select EXYNOS_4_COMMON_CLK if ARM && ARCH_EXYNOS4
+       select EXYNOS_5250_COMMON_CLK if ARM && SOC_EXYNOS5250
+       select EXYNOS_5260_COMMON_CLK if ARM && SOC_EXYNOS5260
+       select EXYNOS_5410_COMMON_CLK if ARM && SOC_EXYNOS5410
+       select EXYNOS_5420_COMMON_CLK if ARM && SOC_EXYNOS5420
        select EXYNOS_ARM64_COMMON_CLK if ARM64 && ARCH_EXYNOS
 
+config S3C64XX_COMMON_CLK
+       bool "Samsung S3C64xx clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung S3C64xx SoCs.
+         Choose Y here only if you build for this SoC.
+
+config S5PV210_COMMON_CLK
+       bool "Samsung S5Pv210 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung S5Pv210 SoCs.
+         Choose Y here only if you build for this SoC.
+
+config EXYNOS_3250_COMMON_CLK
+       bool "Samsung Exynos3250 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung
+         Exynos3250 SoCs. Choose Y here only if you build for this SoC.
+
+config EXYNOS_4_COMMON_CLK
+       bool "Samsung Exynos4 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung
+         Exynos4212 and Exynos4412 SoCs. Choose Y here only if you build for
+         this SoC.
+
+config EXYNOS_5250_COMMON_CLK
+       bool "Samsung Exynos5250 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung
+         Exynos5250 SoCs. Choose Y here only if you build for this SoC.
+
+config EXYNOS_5260_COMMON_CLK
+       bool "Samsung Exynos5260 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung
+         Exynos5260 SoCs. Choose Y here only if you build for this SoC.
+
+config EXYNOS_5410_COMMON_CLK
+       bool "Samsung Exynos5410 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung
+         Exynos5410 SoCs. Choose Y here only if you build for this SoC.
+
+config EXYNOS_5420_COMMON_CLK
+       bool "Samsung Exynos5420 clock controller support" if COMPILE_TEST
+       depends on COMMON_CLK_SAMSUNG
+       help
+         Support for the clock controller present on the Samsung
+         Exynos5420 SoCs. Choose Y here only if you build for this SoC.
+
 config EXYNOS_ARM64_COMMON_CLK
        bool "Samsung Exynos ARMv8-family clock controller support" if COMPILE_TEST
        depends on COMMON_CLK_SAMSUNG
index 6891b08..028b2e2 100644 (file)
@@ -4,15 +4,15 @@
 #
 
 obj-$(CONFIG_COMMON_CLK)       += clk.o clk-pll.o clk-cpu.o
-obj-$(CONFIG_SOC_EXYNOS3250)   += clk-exynos3250.o
-obj-$(CONFIG_ARCH_EXYNOS4)     += clk-exynos4.o
-obj-$(CONFIG_ARCH_EXYNOS4)     += clk-exynos4412-isp.o
-obj-$(CONFIG_SOC_EXYNOS5250)   += clk-exynos5250.o
-obj-$(CONFIG_SOC_EXYNOS5250)   += clk-exynos5-subcmu.o
-obj-$(CONFIG_SOC_EXYNOS5260)   += clk-exynos5260.o
-obj-$(CONFIG_SOC_EXYNOS5410)   += clk-exynos5410.o
-obj-$(CONFIG_SOC_EXYNOS5420)   += clk-exynos5420.o
-obj-$(CONFIG_SOC_EXYNOS5420)   += clk-exynos5-subcmu.o
+obj-$(CONFIG_EXYNOS_3250_COMMON_CLK)   += clk-exynos3250.o
+obj-$(CONFIG_EXYNOS_4_COMMON_CLK)      += clk-exynos4.o
+obj-$(CONFIG_EXYNOS_4_COMMON_CLK)      += clk-exynos4412-isp.o
+obj-$(CONFIG_EXYNOS_5250_COMMON_CLK)   += clk-exynos5250.o
+obj-$(CONFIG_EXYNOS_5250_COMMON_CLK)   += clk-exynos5-subcmu.o
+obj-$(CONFIG_EXYNOS_5260_COMMON_CLK)   += clk-exynos5260.o
+obj-$(CONFIG_EXYNOS_5410_COMMON_CLK)   += clk-exynos5410.o
+obj-$(CONFIG_EXYNOS_5420_COMMON_CLK)   += clk-exynos5420.o
+obj-$(CONFIG_EXYNOS_5420_COMMON_CLK)   += clk-exynos5-subcmu.o
 obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK)  += clk-exynos5433.o
 obj-$(CONFIG_EXYNOS_AUDSS_CLK_CON) += clk-exynos-audss.o
 obj-$(CONFIG_EXYNOS_CLKOUT)    += clk-exynos-clkout.o
@@ -21,5 +21,5 @@ obj-$(CONFIG_S3C2410_COMMON_CLK)+= clk-s3c2410.o
 obj-$(CONFIG_S3C2410_COMMON_DCLK)+= clk-s3c2410-dclk.o
 obj-$(CONFIG_S3C2412_COMMON_CLK)+= clk-s3c2412.o
 obj-$(CONFIG_S3C2443_COMMON_CLK)+= clk-s3c2443.o
-obj-$(CONFIG_ARCH_S3C64XX)     += clk-s3c64xx.o
-obj-$(CONFIG_ARCH_S5PV210)     += clk-s5pv210.o clk-s5pv210-audss.o
+obj-$(CONFIG_S3C64XX_COMMON_CLK)       += clk-s3c64xx.o
+obj-$(CONFIG_S5PV210_COMMON_CLK)       += clk-s5pv210.o clk-s5pv210-audss.o
index ac70ad7..5873a93 100644 (file)
@@ -8,14 +8,17 @@
 
 #include <linux/errno.h>
 #include <linux/hrtimer.h>
+#include <linux/iopoll.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/timekeeping.h>
 #include <linux/clk-provider.h>
 #include <linux/io.h>
 #include "clk.h"
 #include "clk-pll.h"
 
-#define PLL_TIMEOUT_MS         10
+#define PLL_TIMEOUT_US         20000U
+#define PLL_TIMEOUT_LOOPS      1000000U
 
 struct samsung_clk_pll {
        struct clk_hw           hw;
@@ -63,6 +66,53 @@ static long samsung_pll_round_rate(struct clk_hw *hw,
        return rate_table[i - 1].rate;
 }
 
+static bool pll_early_timeout = true;
+
+static int __init samsung_pll_disable_early_timeout(void)
+{
+       pll_early_timeout = false;
+       return 0;
+}
+arch_initcall(samsung_pll_disable_early_timeout);
+
+/* Wait until the PLL is locked */
+static int samsung_pll_lock_wait(struct samsung_clk_pll *pll,
+                                unsigned int reg_mask)
+{
+       int i, ret;
+       u32 val;
+
+       /*
+        * This function might be called when the timekeeping API can't be used
+        * to detect timeouts. One situation is when the clocksource is not yet
+        * initialized, another when the timekeeping is suspended. udelay() also
+        * cannot be used when the clocksource is not running on arm64, since
+        * the current timer is used as cycle counter. So a simple busy loop
+        * is used here in that special cases. The limit of iterations has been
+        * derived from experimental measurements of various PLLs on multiple
+        * Exynos SoC variants. Single register read time was usually in range
+        * 0.4...1.5 us, never less than 0.4 us.
+        */
+       if (pll_early_timeout || timekeeping_suspended) {
+               i = PLL_TIMEOUT_LOOPS;
+               while (i-- > 0) {
+                       if (readl_relaxed(pll->con_reg) & reg_mask)
+                               return 0;
+
+                       cpu_relax();
+               }
+               ret = -ETIMEDOUT;
+       } else {
+               ret = readl_relaxed_poll_timeout_atomic(pll->con_reg, val,
+                                       val & reg_mask, 0, PLL_TIMEOUT_US);
+       }
+
+       if (ret < 0)
+               pr_err("Could not lock PLL %s\n", clk_hw_get_name(&pll->hw));
+
+       return ret;
+}
+
 static int samsung_pll3xxx_enable(struct clk_hw *hw)
 {
        struct samsung_clk_pll *pll = to_clk_pll(hw);
@@ -72,13 +122,7 @@ static int samsung_pll3xxx_enable(struct clk_hw *hw)
        tmp |= BIT(pll->enable_offs);
        writel_relaxed(tmp, pll->con_reg);
 
-       /* wait lock time */
-       do {
-               cpu_relax();
-               tmp = readl_relaxed(pll->con_reg);
-       } while (!(tmp & BIT(pll->lock_offs)));
-
-       return 0;
+       return samsung_pll_lock_wait(pll, BIT(pll->lock_offs));
 }
 
 static void samsung_pll3xxx_disable(struct clk_hw *hw)
@@ -240,13 +284,10 @@ static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate,
                        (rate->sdiv << PLL35XX_SDIV_SHIFT);
        writel_relaxed(tmp, pll->con_reg);
 
-       /* Wait until the PLL is locked if it is enabled. */
-       if (tmp & BIT(pll->enable_offs)) {
-               do {
-                       cpu_relax();
-                       tmp = readl_relaxed(pll->con_reg);
-               } while (!(tmp & BIT(pll->lock_offs)));
-       }
+       /* Wait for PLL lock if the PLL is enabled */
+       if (tmp & BIT(pll->enable_offs))
+               return samsung_pll_lock_wait(pll, BIT(pll->lock_offs));
+
        return 0;
 }
 
@@ -318,7 +359,7 @@ static int samsung_pll36xx_set_rate(struct clk_hw *hw, unsigned long drate,
                                        unsigned long parent_rate)
 {
        struct samsung_clk_pll *pll = to_clk_pll(hw);
-       u32 tmp, pll_con0, pll_con1;
+       u32 pll_con0, pll_con1;
        const struct samsung_pll_rate_table *rate;
 
        rate = samsung_get_pll_settings(pll, drate);
@@ -356,13 +397,8 @@ static int samsung_pll36xx_set_rate(struct clk_hw *hw, unsigned long drate,
        pll_con1 |= rate->kdiv << PLL36XX_KDIV_SHIFT;
        writel_relaxed(pll_con1, pll->con_reg + 4);
 
-       /* wait_lock_time */
-       if (pll_con0 & BIT(pll->enable_offs)) {
-               do {
-                       cpu_relax();
-                       tmp = readl_relaxed(pll->con_reg);
-               } while (!(tmp & BIT(pll->lock_offs)));
-       }
+       if (pll_con0 & BIT(pll->enable_offs))
+               return samsung_pll_lock_wait(pll, BIT(pll->lock_offs));
 
        return 0;
 }
@@ -437,7 +473,6 @@ static int samsung_pll45xx_set_rate(struct clk_hw *hw, unsigned long drate,
        struct samsung_clk_pll *pll = to_clk_pll(hw);
        const struct samsung_pll_rate_table *rate;
        u32 con0, con1;
-       ktime_t start;
 
        /* Get required rate settings from table */
        rate = samsung_get_pll_settings(pll, drate);
@@ -488,21 +523,8 @@ static int samsung_pll45xx_set_rate(struct clk_hw *hw, unsigned long drate,
        writel_relaxed(con1, pll->con_reg + 0x4);
        writel_relaxed(con0, pll->con_reg);
 
-       /* Wait for locking. */
-       start = ktime_get();
-       while (!(readl_relaxed(pll->con_reg) & PLL45XX_LOCKED)) {
-               ktime_t delta = ktime_sub(ktime_get(), start);
-
-               if (ktime_to_ms(delta) > PLL_TIMEOUT_MS) {
-                       pr_err("%s: could not lock PLL %s\n",
-                                       __func__, clk_hw_get_name(hw));
-                       return -EFAULT;
-               }
-
-               cpu_relax();
-       }
-
-       return 0;
+       /* Wait for PLL lock */
+       return samsung_pll_lock_wait(pll, PLL45XX_LOCKED);
 }
 
 static const struct clk_ops samsung_pll45xx_clk_ops = {
@@ -588,7 +610,6 @@ static int samsung_pll46xx_set_rate(struct clk_hw *hw, unsigned long drate,
        struct samsung_clk_pll *pll = to_clk_pll(hw);
        const struct samsung_pll_rate_table *rate;
        u32 con0, con1, lock;
-       ktime_t start;
 
        /* Get required rate settings from table */
        rate = samsung_get_pll_settings(pll, drate);
@@ -647,21 +668,8 @@ static int samsung_pll46xx_set_rate(struct clk_hw *hw, unsigned long drate,
        writel_relaxed(con0, pll->con_reg);
        writel_relaxed(con1, pll->con_reg + 0x4);
 
-       /* Wait for locking. */
-       start = ktime_get();
-       while (!(readl_relaxed(pll->con_reg) & PLL46XX_LOCKED)) {
-               ktime_t delta = ktime_sub(ktime_get(), start);
-
-               if (ktime_to_ms(delta) > PLL_TIMEOUT_MS) {
-                       pr_err("%s: could not lock PLL %s\n",
-                                       __func__, clk_hw_get_name(hw));
-                       return -EFAULT;
-               }
-
-               cpu_relax();
-       }
-
-       return 0;
+       /* Wait for PLL lock */
+       return samsung_pll_lock_wait(pll, PLL46XX_LOCKED);
 }
 
 static const struct clk_ops samsung_pll46xx_clk_ops = {
@@ -1035,14 +1043,9 @@ static int samsung_pll2550xx_set_rate(struct clk_hw *hw, unsigned long drate,
                        (rate->sdiv << PLL2550XX_S_SHIFT);
        writel_relaxed(tmp, pll->con_reg);
 
-       /* wait_lock_time */
-       do {
-               cpu_relax();
-               tmp = readl_relaxed(pll->con_reg);
-       } while (!(tmp & (PLL2550XX_LOCK_STAT_MASK
-                       << PLL2550XX_LOCK_STAT_SHIFT)));
-
-       return 0;
+       /* Wait for PLL lock */
+       return samsung_pll_lock_wait(pll,
+                       PLL2550XX_LOCK_STAT_MASK << PLL2550XX_LOCK_STAT_SHIFT);
 }
 
 static const struct clk_ops samsung_pll2550xx_clk_ops = {
@@ -1132,13 +1135,9 @@ static int samsung_pll2650x_set_rate(struct clk_hw *hw, unsigned long drate,
        con1 |= ((rate->kdiv & PLL2650X_K_MASK) << PLL2650X_K_SHIFT);
        writel_relaxed(con1, pll->con_reg + 4);
 
-       do {
-               cpu_relax();
-               con0 = readl_relaxed(pll->con_reg);
-       } while (!(con0 & (PLL2650X_LOCK_STAT_MASK
-                       << PLL2650X_LOCK_STAT_SHIFT)));
-
-       return 0;
+       /* Wait for PLL lock */
+       return samsung_pll_lock_wait(pll,
+                       PLL2650X_LOCK_STAT_MASK << PLL2650X_LOCK_STAT_SHIFT);
 }
 
 static const struct clk_ops samsung_pll2650x_clk_ops = {
@@ -1196,7 +1195,7 @@ static int samsung_pll2650xx_set_rate(struct clk_hw *hw, unsigned long drate,
                                        unsigned long parent_rate)
 {
        struct samsung_clk_pll *pll = to_clk_pll(hw);
-       u32 tmp, pll_con0, pll_con2;
+       u32 pll_con0, pll_con2;
        const struct samsung_pll_rate_table *rate;
 
        rate = samsung_get_pll_settings(pll, drate);
@@ -1229,11 +1228,7 @@ static int samsung_pll2650xx_set_rate(struct clk_hw *hw, unsigned long drate,
        writel_relaxed(pll_con0, pll->con_reg);
        writel_relaxed(pll_con2, pll->con_reg + 8);
 
-       do {
-               tmp = readl_relaxed(pll->con_reg);
-       } while (!(tmp & (0x1 << PLL2650XX_PLL_LOCKTIME_SHIFT)));
-
-       return 0;
+       return samsung_pll_lock_wait(pll, 0x1 << PLL2650XX_PLL_LOCKTIME_SHIFT);
 }
 
 static const struct clk_ops samsung_pll2650xx_clk_ops = {
index f3b4eb9..1c14eb2 100644 (file)
@@ -8,12 +8,12 @@ menuconfig CLK_SIFIVE
 
 if CLK_SIFIVE
 
-config CLK_SIFIVE_FU540_PRCI
-       bool "PRCI driver for SiFive FU540 SoCs"
+config CLK_SIFIVE_PRCI
+       bool "PRCI driver for SiFive SoCs"
        select CLK_ANALOGBITS_WRPLL_CLN28HPC
        help
          Supports the Power Reset Clock interface (PRCI) IP block found in
-         FU540 SoCs.  If this kernel is meant to run on a SiFive FU540 SoC,
-         enable this driver.
+         FU540/FU740 SoCs. If this kernel is meant to run on a SiFive FU540/
+         FU740 SoCs, enable this driver.
 
 endif
index 0797f14..7b06fc0 100644 (file)
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CLK_SIFIVE_FU540_PRCI)    += fu540-prci.o
+obj-$(CONFIG_CLK_SIFIVE_PRCI)  += sifive-prci.o fu540-prci.o fu740-prci.o
index a8901f9..29bab91 100644 (file)
@@ -1,17 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2018-2019 SiFive, Inc.
- * Wesley Terpstra
- * Paul Walmsley
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * Copyright (C) 2018-2019 Wesley Terpstra
+ * Copyright (C) 2018-2019 Paul Walmsley
+ * Copyright (C) 2020 Zong Li
  *
  * The FU540 PRCI implements clock and reset control for the SiFive
  * FU540-C000 chip.  This driver assumes that it has sole control
  * - SiFive FU540-C000 manual v1p0, Chapter 7 "Clocking and Reset"
  */
 
-#include <dt-bindings/clock/sifive-fu540-prci.h>
-#include <linux/clkdev.h>
-#include <linux/clk-provider.h>
-#include <linux/clk/analogbits-wrpll-cln28hpc.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_clk.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-/*
- * EXPECTED_CLK_PARENT_COUNT: how many parent clocks this driver expects:
- *     hfclk and rtcclk
- */
-#define EXPECTED_CLK_PARENT_COUNT              2
-
-/*
- * Register offsets and bitmasks
- */
-
-/* COREPLLCFG0 */
-#define PRCI_COREPLLCFG0_OFFSET                        0x4
-# define PRCI_COREPLLCFG0_DIVR_SHIFT           0
-# define PRCI_COREPLLCFG0_DIVR_MASK            (0x3f << PRCI_COREPLLCFG0_DIVR_SHIFT)
-# define PRCI_COREPLLCFG0_DIVF_SHIFT           6
-# define PRCI_COREPLLCFG0_DIVF_MASK            (0x1ff << PRCI_COREPLLCFG0_DIVF_SHIFT)
-# define PRCI_COREPLLCFG0_DIVQ_SHIFT           15
-# define PRCI_COREPLLCFG0_DIVQ_MASK            (0x7 << PRCI_COREPLLCFG0_DIVQ_SHIFT)
-# define PRCI_COREPLLCFG0_RANGE_SHIFT          18
-# define PRCI_COREPLLCFG0_RANGE_MASK           (0x7 << PRCI_COREPLLCFG0_RANGE_SHIFT)
-# define PRCI_COREPLLCFG0_BYPASS_SHIFT         24
-# define PRCI_COREPLLCFG0_BYPASS_MASK          (0x1 << PRCI_COREPLLCFG0_BYPASS_SHIFT)
-# define PRCI_COREPLLCFG0_FSE_SHIFT            25
-# define PRCI_COREPLLCFG0_FSE_MASK             (0x1 << PRCI_COREPLLCFG0_FSE_SHIFT)
-# define PRCI_COREPLLCFG0_LOCK_SHIFT           31
-# define PRCI_COREPLLCFG0_LOCK_MASK            (0x1 << PRCI_COREPLLCFG0_LOCK_SHIFT)
 
-/* DDRPLLCFG0 */
-#define PRCI_DDRPLLCFG0_OFFSET                 0xc
-# define PRCI_DDRPLLCFG0_DIVR_SHIFT            0
-# define PRCI_DDRPLLCFG0_DIVR_MASK             (0x3f << PRCI_DDRPLLCFG0_DIVR_SHIFT)
-# define PRCI_DDRPLLCFG0_DIVF_SHIFT            6
-# define PRCI_DDRPLLCFG0_DIVF_MASK             (0x1ff << PRCI_DDRPLLCFG0_DIVF_SHIFT)
-# define PRCI_DDRPLLCFG0_DIVQ_SHIFT            15
-# define PRCI_DDRPLLCFG0_DIVQ_MASK             (0x7 << PRCI_DDRPLLCFG0_DIVQ_SHIFT)
-# define PRCI_DDRPLLCFG0_RANGE_SHIFT           18
-# define PRCI_DDRPLLCFG0_RANGE_MASK            (0x7 << PRCI_DDRPLLCFG0_RANGE_SHIFT)
-# define PRCI_DDRPLLCFG0_BYPASS_SHIFT          24
-# define PRCI_DDRPLLCFG0_BYPASS_MASK           (0x1 << PRCI_DDRPLLCFG0_BYPASS_SHIFT)
-# define PRCI_DDRPLLCFG0_FSE_SHIFT             25
-# define PRCI_DDRPLLCFG0_FSE_MASK              (0x1 << PRCI_DDRPLLCFG0_FSE_SHIFT)
-# define PRCI_DDRPLLCFG0_LOCK_SHIFT            31
-# define PRCI_DDRPLLCFG0_LOCK_MASK             (0x1 << PRCI_DDRPLLCFG0_LOCK_SHIFT)
-
-/* DDRPLLCFG1 */
-#define PRCI_DDRPLLCFG1_OFFSET                 0x10
-# define PRCI_DDRPLLCFG1_CKE_SHIFT             24
-# define PRCI_DDRPLLCFG1_CKE_MASK              (0x1 << PRCI_DDRPLLCFG1_CKE_SHIFT)
-
-/* GEMGXLPLLCFG0 */
-#define PRCI_GEMGXLPLLCFG0_OFFSET              0x1c
-# define PRCI_GEMGXLPLLCFG0_DIVR_SHIFT         0
-# define PRCI_GEMGXLPLLCFG0_DIVR_MASK          (0x3f << PRCI_GEMGXLPLLCFG0_DIVR_SHIFT)
-# define PRCI_GEMGXLPLLCFG0_DIVF_SHIFT         6
-# define PRCI_GEMGXLPLLCFG0_DIVF_MASK          (0x1ff << PRCI_GEMGXLPLLCFG0_DIVF_SHIFT)
-# define PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT         15
-# define PRCI_GEMGXLPLLCFG0_DIVQ_MASK          (0x7 << PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT)
-# define PRCI_GEMGXLPLLCFG0_RANGE_SHIFT                18
-# define PRCI_GEMGXLPLLCFG0_RANGE_MASK         (0x7 << PRCI_GEMGXLPLLCFG0_RANGE_SHIFT)
-# define PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT       24
-# define PRCI_GEMGXLPLLCFG0_BYPASS_MASK                (0x1 << PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT)
-# define PRCI_GEMGXLPLLCFG0_FSE_SHIFT          25
-# define PRCI_GEMGXLPLLCFG0_FSE_MASK           (0x1 << PRCI_GEMGXLPLLCFG0_FSE_SHIFT)
-# define PRCI_GEMGXLPLLCFG0_LOCK_SHIFT         31
-# define PRCI_GEMGXLPLLCFG0_LOCK_MASK          (0x1 << PRCI_GEMGXLPLLCFG0_LOCK_SHIFT)
-
-/* GEMGXLPLLCFG1 */
-#define PRCI_GEMGXLPLLCFG1_OFFSET              0x20
-# define PRCI_GEMGXLPLLCFG1_CKE_SHIFT          24
-# define PRCI_GEMGXLPLLCFG1_CKE_MASK           (0x1 << PRCI_GEMGXLPLLCFG1_CKE_SHIFT)
-
-/* CORECLKSEL */
-#define PRCI_CORECLKSEL_OFFSET                 0x24
-# define PRCI_CORECLKSEL_CORECLKSEL_SHIFT      0
-# define PRCI_CORECLKSEL_CORECLKSEL_MASK       (0x1 << PRCI_CORECLKSEL_CORECLKSEL_SHIFT)
-
-/* DEVICESRESETREG */
-#define PRCI_DEVICESRESETREG_OFFSET                    0x28
-# define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT     0
-# define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_MASK      (0x1 << PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT)
-# define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT      1
-# define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_MASK       (0x1 << PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT)
-# define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT      2
-# define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_MASK       (0x1 << PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT)
-# define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT      3
-# define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_MASK       (0x1 << PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT)
-# define PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT       5
-# define PRCI_DEVICESRESETREG_GEMGXL_RST_N_MASK                (0x1 << PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT)
+#include <dt-bindings/clock/sifive-fu540-prci.h>
 
-/* CLKMUXSTATUSREG */
-#define PRCI_CLKMUXSTATUSREG_OFFSET                    0x2c
-# define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT    1
-# define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK     (0x1 << PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT)
+#include "fu540-prci.h"
+#include "sifive-prci.h"
 
-/*
- * Private structures
- */
+/* PRCI integration data for each WRPLL instance */
 
-/**
- * struct __prci_data - per-device-instance data
- * @va: base virtual address of the PRCI IP block
- * @hw_clks: encapsulates struct clk_hw records
- *
- * PRCI per-device instance data
- */
-struct __prci_data {
-       void __iomem *va;
-       struct clk_hw_onecell_data hw_clks;
+static struct __prci_wrpll_data __prci_corepll_data = {
+       .cfg0_offs = PRCI_COREPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_COREPLLCFG1_OFFSET,
+       .enable_bypass = sifive_prci_coreclksel_use_hfclk,
+       .disable_bypass = sifive_prci_coreclksel_use_corepll,
 };
 
-/**
- * struct __prci_wrpll_data - WRPLL configuration and integration data
- * @c: WRPLL current configuration record
- * @enable_bypass: fn ptr to code to bypass the WRPLL (if applicable; else NULL)
- * @disable_bypass: fn ptr to code to not bypass the WRPLL (or NULL)
- * @cfg0_offs: WRPLL CFG0 register offset (in bytes) from the PRCI base address
- *
- * @enable_bypass and @disable_bypass are used for WRPLL instances
- * that contain a separate external glitchless clock mux downstream
- * from the PLL.  The WRPLL internal bypass mux is not glitchless.
- */
-struct __prci_wrpll_data {
-       struct wrpll_cfg c;
-       void (*enable_bypass)(struct __prci_data *pd);
-       void (*disable_bypass)(struct __prci_data *pd);
-       u8 cfg0_offs;
+static struct __prci_wrpll_data __prci_ddrpll_data = {
+       .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_DDRPLLCFG1_OFFSET,
 };
 
-/**
- * struct __prci_clock - describes a clock device managed by PRCI
- * @name: user-readable clock name string - should match the manual
- * @parent_name: parent name for this clock
- * @ops: struct clk_ops for the Linux clock framework to use for control
- * @hw: Linux-private clock data
- * @pwd: WRPLL-specific data, associated with this clock (if not NULL)
- * @pd: PRCI-specific data associated with this clock (if not NULL)
- *
- * PRCI clock data.  Used by the PRCI driver to register PRCI-provided
- * clocks to the Linux clock infrastructure.
- */
-struct __prci_clock {
-       const char *name;
-       const char *parent_name;
-       const struct clk_ops *ops;
-       struct clk_hw hw;
-       struct __prci_wrpll_data *pwd;
-       struct __prci_data *pd;
+static struct __prci_wrpll_data __prci_gemgxlpll_data = {
+       .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_GEMGXLPLLCFG1_OFFSET,
 };
 
-#define clk_hw_to_prci_clock(pwd) container_of(pwd, struct __prci_clock, hw)
-
-/*
- * Private functions
- */
-
-/**
- * __prci_readl() - read from a PRCI register
- * @pd: PRCI context
- * @offs: register offset to read from (in bytes, from PRCI base address)
- *
- * Read the register located at offset @offs from the base virtual
- * address of the PRCI register target described by @pd, and return
- * the value to the caller.
- *
- * Context: Any context.
- *
- * Return: the contents of the register described by @pd and @offs.
- */
-static u32 __prci_readl(struct __prci_data *pd, u32 offs)
-{
-       return readl_relaxed(pd->va + offs);
-}
-
-static void __prci_writel(u32 v, u32 offs, struct __prci_data *pd)
-{
-       writel_relaxed(v, pd->va + offs);
-}
-
-/* WRPLL-related private functions */
-
-/**
- * __prci_wrpll_unpack() - unpack WRPLL configuration registers into parameters
- * @c: ptr to a struct wrpll_cfg record to write config into
- * @r: value read from the PRCI PLL configuration register
- *
- * Given a value @r read from an FU540 PRCI PLL configuration register,
- * split it into fields and populate it into the WRPLL configuration record
- * pointed to by @c.
- *
- * The COREPLLCFG0 macros are used below, but the other *PLLCFG0 macros
- * have the same register layout.
- *
- * Context: Any context.
- */
-static void __prci_wrpll_unpack(struct wrpll_cfg *c, u32 r)
-{
-       u32 v;
-
-       v = r & PRCI_COREPLLCFG0_DIVR_MASK;
-       v >>= PRCI_COREPLLCFG0_DIVR_SHIFT;
-       c->divr = v;
-
-       v = r & PRCI_COREPLLCFG0_DIVF_MASK;
-       v >>= PRCI_COREPLLCFG0_DIVF_SHIFT;
-       c->divf = v;
-
-       v = r & PRCI_COREPLLCFG0_DIVQ_MASK;
-       v >>= PRCI_COREPLLCFG0_DIVQ_SHIFT;
-       c->divq = v;
-
-       v = r & PRCI_COREPLLCFG0_RANGE_MASK;
-       v >>= PRCI_COREPLLCFG0_RANGE_SHIFT;
-       c->range = v;
-
-       c->flags &= (WRPLL_FLAGS_INT_FEEDBACK_MASK |
-                    WRPLL_FLAGS_EXT_FEEDBACK_MASK);
-
-       /* external feedback mode not supported */
-       c->flags |= WRPLL_FLAGS_INT_FEEDBACK_MASK;
-}
-
-/**
- * __prci_wrpll_pack() - pack PLL configuration parameters into a register value
- * @c: pointer to a struct wrpll_cfg record containing the PLL's cfg
- *
- * Using a set of WRPLL configuration values pointed to by @c,
- * assemble a PRCI PLL configuration register value, and return it to
- * the caller.
- *
- * Context: Any context.  Caller must ensure that the contents of the
- *          record pointed to by @c do not change during the execution
- *          of this function.
- *
- * Returns: a value suitable for writing into a PRCI PLL configuration
- *          register
- */
-static u32 __prci_wrpll_pack(const struct wrpll_cfg *c)
-{
-       u32 r = 0;
-
-       r |= c->divr << PRCI_COREPLLCFG0_DIVR_SHIFT;
-       r |= c->divf << PRCI_COREPLLCFG0_DIVF_SHIFT;
-       r |= c->divq << PRCI_COREPLLCFG0_DIVQ_SHIFT;
-       r |= c->range << PRCI_COREPLLCFG0_RANGE_SHIFT;
-
-       /* external feedback mode not supported */
-       r |= PRCI_COREPLLCFG0_FSE_MASK;
-
-       return r;
-}
-
-/**
- * __prci_wrpll_read_cfg() - read the WRPLL configuration from the PRCI
- * @pd: PRCI context
- * @pwd: PRCI WRPLL metadata
- *
- * Read the current configuration of the PLL identified by @pwd from
- * the PRCI identified by @pd, and store it into the local configuration
- * cache in @pwd.
- *
- * Context: Any context.  Caller must prevent the records pointed to by
- *          @pd and @pwd from changing during execution.
- */
-static void __prci_wrpll_read_cfg(struct __prci_data *pd,
-                                 struct __prci_wrpll_data *pwd)
-{
-       __prci_wrpll_unpack(&pwd->c, __prci_readl(pd, pwd->cfg0_offs));
-}
-
-/**
- * __prci_wrpll_write_cfg() - write WRPLL configuration into the PRCI
- * @pd: PRCI context
- * @pwd: PRCI WRPLL metadata
- * @c: WRPLL configuration record to write
- *
- * Write the WRPLL configuration described by @c into the WRPLL
- * configuration register identified by @pwd in the PRCI instance
- * described by @c.  Make a cached copy of the WRPLL's current
- * configuration so it can be used by other code.
- *
- * Context: Any context.  Caller must prevent the records pointed to by
- *          @pd and @pwd from changing during execution.
- */
-static void __prci_wrpll_write_cfg(struct __prci_data *pd,
-                                  struct __prci_wrpll_data *pwd,
-                                  struct wrpll_cfg *c)
-{
-       __prci_writel(__prci_wrpll_pack(c), pwd->cfg0_offs, pd);
-
-       memcpy(&pwd->c, c, sizeof(*c));
-}
-
-/* Core clock mux control */
-
-/**
- * __prci_coreclksel_use_hfclk() - switch the CORECLK mux to output HFCLK
- * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg
- *
- * Switch the CORECLK mux to the HFCLK input source; return once complete.
- *
- * Context: Any context.  Caller must prevent concurrent changes to the
- *          PRCI_CORECLKSEL_OFFSET register.
- */
-static void __prci_coreclksel_use_hfclk(struct __prci_data *pd)
-{
-       u32 r;
-
-       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);
-       r |= PRCI_CORECLKSEL_CORECLKSEL_MASK;
-       __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd);
-
-       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */
-}
-
-/**
- * __prci_coreclksel_use_corepll() - switch the CORECLK mux to output COREPLL
- * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg
- *
- * Switch the CORECLK mux to the PLL output clock; return once complete.
- *
- * Context: Any context.  Caller must prevent concurrent changes to the
- *          PRCI_CORECLKSEL_OFFSET register.
- */
-static void __prci_coreclksel_use_corepll(struct __prci_data *pd)
-{
-       u32 r;
-
-       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);
-       r &= ~PRCI_CORECLKSEL_CORECLKSEL_MASK;
-       __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd);
-
-       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET); /* barrier */
-}
-
-/*
- * Linux clock framework integration
- *
- * See the Linux clock framework documentation for more information on
- * these functions.
- */
-
-static unsigned long sifive_fu540_prci_wrpll_recalc_rate(struct clk_hw *hw,
-                                                        unsigned long parent_rate)
-{
-       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
-       struct __prci_wrpll_data *pwd = pc->pwd;
-
-       return wrpll_calc_output_rate(&pwd->c, parent_rate);
-}
-
-static long sifive_fu540_prci_wrpll_round_rate(struct clk_hw *hw,
-                                              unsigned long rate,
-                                              unsigned long *parent_rate)
-{
-       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
-       struct __prci_wrpll_data *pwd = pc->pwd;
-       struct wrpll_cfg c;
-
-       memcpy(&c, &pwd->c, sizeof(c));
-
-       wrpll_configure_for_rate(&c, rate, *parent_rate);
-
-       return wrpll_calc_output_rate(&c, *parent_rate);
-}
-
-static int sifive_fu540_prci_wrpll_set_rate(struct clk_hw *hw,
-                                           unsigned long rate,
-                                           unsigned long parent_rate)
-{
-       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
-       struct __prci_wrpll_data *pwd = pc->pwd;
-       struct __prci_data *pd = pc->pd;
-       int r;
-
-       r = wrpll_configure_for_rate(&pwd->c, rate, parent_rate);
-       if (r)
-               return r;
-
-       if (pwd->enable_bypass)
-               pwd->enable_bypass(pd);
-
-       __prci_wrpll_write_cfg(pd, pwd, &pwd->c);
-
-       udelay(wrpll_calc_max_lock_us(&pwd->c));
-
-       if (pwd->disable_bypass)
-               pwd->disable_bypass(pd);
-
-       return 0;
-}
+/* Linux clock framework integration */
 
 static const struct clk_ops sifive_fu540_prci_wrpll_clk_ops = {
-       .set_rate = sifive_fu540_prci_wrpll_set_rate,
-       .round_rate = sifive_fu540_prci_wrpll_round_rate,
-       .recalc_rate = sifive_fu540_prci_wrpll_recalc_rate,
+       .set_rate = sifive_prci_wrpll_set_rate,
+       .round_rate = sifive_prci_wrpll_round_rate,
+       .recalc_rate = sifive_prci_wrpll_recalc_rate,
+       .enable = sifive_prci_clock_enable,
+       .disable = sifive_prci_clock_disable,
+       .is_enabled = sifive_clk_is_enabled,
 };
 
 static const struct clk_ops sifive_fu540_prci_wrpll_ro_clk_ops = {
-       .recalc_rate = sifive_fu540_prci_wrpll_recalc_rate,
+       .recalc_rate = sifive_prci_wrpll_recalc_rate,
 };
 
-/* TLCLKSEL clock integration */
-
-static unsigned long sifive_fu540_prci_tlclksel_recalc_rate(struct clk_hw *hw,
-                                                           unsigned long parent_rate)
-{
-       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
-       struct __prci_data *pd = pc->pd;
-       u32 v;
-       u8 div;
-
-       v = __prci_readl(pd, PRCI_CLKMUXSTATUSREG_OFFSET);
-       v &= PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK;
-       div = v ? 1 : 2;
-
-       return div_u64(parent_rate, div);
-}
-
 static const struct clk_ops sifive_fu540_prci_tlclksel_clk_ops = {
-       .recalc_rate = sifive_fu540_prci_tlclksel_recalc_rate,
-};
-
-/*
- * PRCI integration data for each WRPLL instance
- */
-
-static struct __prci_wrpll_data __prci_corepll_data = {
-       .cfg0_offs = PRCI_COREPLLCFG0_OFFSET,
-       .enable_bypass = __prci_coreclksel_use_hfclk,
-       .disable_bypass = __prci_coreclksel_use_corepll,
-};
-
-static struct __prci_wrpll_data __prci_ddrpll_data = {
-       .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET,
+       .recalc_rate = sifive_prci_tlclksel_recalc_rate,
 };
 
-static struct __prci_wrpll_data __prci_gemgxlpll_data = {
-       .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET,
-};
-
-/*
- * List of clock controls provided by the PRCI
- */
-
-static struct __prci_clock __prci_init_clocks[] = {
+/* List of clock controls provided by the PRCI */
+struct __prci_clock __prci_init_clocks_fu540[] = {
        [PRCI_CLK_COREPLL] = {
                .name = "corepll",
                .parent_name = "hfclk",
@@ -506,125 +87,3 @@ static struct __prci_clock __prci_init_clocks[] = {
                .ops = &sifive_fu540_prci_tlclksel_clk_ops,
        },
 };
-
-/**
- * __prci_register_clocks() - register clock controls in the PRCI with Linux
- * @dev: Linux struct device *
- *
- * Register the list of clock controls described in __prci_init_plls[] with
- * the Linux clock framework.
- *
- * Return: 0 upon success or a negative error code upon failure.
- */
-static int __prci_register_clocks(struct device *dev, struct __prci_data *pd)
-{
-       struct clk_init_data init = { };
-       struct __prci_clock *pic;
-       int parent_count, i, r;
-
-       parent_count = of_clk_get_parent_count(dev->of_node);
-       if (parent_count != EXPECTED_CLK_PARENT_COUNT) {
-               dev_err(dev, "expected only two parent clocks, found %d\n",
-                       parent_count);
-               return -EINVAL;
-       }
-
-       /* Register PLLs */
-       for (i = 0; i < ARRAY_SIZE(__prci_init_clocks); ++i) {
-               pic = &__prci_init_clocks[i];
-
-               init.name = pic->name;
-               init.parent_names = &pic->parent_name;
-               init.num_parents = 1;
-               init.ops = pic->ops;
-               pic->hw.init = &init;
-
-               pic->pd = pd;
-
-               if (pic->pwd)
-                       __prci_wrpll_read_cfg(pd, pic->pwd);
-
-               r = devm_clk_hw_register(dev, &pic->hw);
-               if (r) {
-                       dev_warn(dev, "Failed to register clock %s: %d\n",
-                                init.name, r);
-                       return r;
-               }
-
-               r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev));
-               if (r) {
-                       dev_warn(dev, "Failed to register clkdev for %s: %d\n",
-                                init.name, r);
-                       return r;
-               }
-
-               pd->hw_clks.hws[i] = &pic->hw;
-       }
-
-       pd->hw_clks.num = i;
-
-       r = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
-                                       &pd->hw_clks);
-       if (r) {
-               dev_err(dev, "could not add hw_provider: %d\n", r);
-               return r;
-       }
-
-       return 0;
-}
-
-/*
- * Linux device model integration
- *
- * See the Linux device model documentation for more information about
- * these functions.
- */
-static int sifive_fu540_prci_probe(struct platform_device *pdev)
-{
-       struct device *dev = &pdev->dev;
-       struct resource *res;
-       struct __prci_data *pd;
-       int r;
-
-       pd = devm_kzalloc(dev,
-                         struct_size(pd, hw_clks.hws,
-                                     ARRAY_SIZE(__prci_init_clocks)),
-                         GFP_KERNEL);
-       if (!pd)
-               return -ENOMEM;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pd->va = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pd->va))
-               return PTR_ERR(pd->va);
-
-       r = __prci_register_clocks(dev, pd);
-       if (r) {
-               dev_err(dev, "could not register clocks: %d\n", r);
-               return r;
-       }
-
-       dev_dbg(dev, "SiFive FU540 PRCI probed\n");
-
-       return 0;
-}
-
-static const struct of_device_id sifive_fu540_prci_of_match[] = {
-       { .compatible = "sifive,fu540-c000-prci", },
-       {}
-};
-MODULE_DEVICE_TABLE(of, sifive_fu540_prci_of_match);
-
-static struct platform_driver sifive_fu540_prci_driver = {
-       .driver = {
-               .name = "sifive-fu540-prci",
-               .of_match_table = sifive_fu540_prci_of_match,
-       },
-       .probe = sifive_fu540_prci_probe,
-};
-
-static int __init sifive_fu540_prci_init(void)
-{
-       return platform_driver_register(&sifive_fu540_prci_driver);
-}
-core_initcall(sifive_fu540_prci_init);
diff --git a/drivers/clk/sifive/fu540-prci.h b/drivers/clk/sifive/fu540-prci.h
new file mode 100644 (file)
index 0000000..c8271ef
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 SiFive, Inc.
+ * Zong Li
+ */
+
+#ifndef __SIFIVE_CLK_FU540_PRCI_H
+#define __SIFIVE_CLK_FU540_PRCI_H
+
+#include "sifive-prci.h"
+
+#define NUM_CLOCK_FU540        4
+
+extern struct __prci_clock __prci_init_clocks_fu540[NUM_CLOCK_FU540];
+
+static const struct prci_clk_desc prci_clk_fu540 = {
+       .clks = __prci_init_clocks_fu540,
+       .num_clks = ARRAY_SIZE(__prci_init_clocks_fu540),
+};
+
+#endif /* __SIFIVE_CLK_FU540_PRCI_H */
diff --git a/drivers/clk/sifive/fu740-prci.c b/drivers/clk/sifive/fu740-prci.c
new file mode 100644 (file)
index 0000000..764d109
--- /dev/null
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 SiFive, Inc.
+ * Copyright (C) 2020 Zong Li
+ */
+
+#include <linux/module.h>
+
+#include <dt-bindings/clock/sifive-fu740-prci.h>
+
+#include "fu540-prci.h"
+#include "sifive-prci.h"
+
+/* PRCI integration data for each WRPLL instance */
+
+static struct __prci_wrpll_data __prci_corepll_data = {
+       .cfg0_offs = PRCI_COREPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_COREPLLCFG1_OFFSET,
+       .enable_bypass = sifive_prci_coreclksel_use_hfclk,
+       .disable_bypass = sifive_prci_coreclksel_use_final_corepll,
+};
+
+static struct __prci_wrpll_data __prci_ddrpll_data = {
+       .cfg0_offs = PRCI_DDRPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_DDRPLLCFG1_OFFSET,
+};
+
+static struct __prci_wrpll_data __prci_gemgxlpll_data = {
+       .cfg0_offs = PRCI_GEMGXLPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_GEMGXLPLLCFG1_OFFSET,
+};
+
+static struct __prci_wrpll_data __prci_dvfscorepll_data = {
+       .cfg0_offs = PRCI_DVFSCOREPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_DVFSCOREPLLCFG1_OFFSET,
+       .enable_bypass = sifive_prci_corepllsel_use_corepll,
+       .disable_bypass = sifive_prci_corepllsel_use_dvfscorepll,
+};
+
+static struct __prci_wrpll_data __prci_hfpclkpll_data = {
+       .cfg0_offs = PRCI_HFPCLKPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_HFPCLKPLLCFG1_OFFSET,
+       .enable_bypass = sifive_prci_hfpclkpllsel_use_hfclk,
+       .disable_bypass = sifive_prci_hfpclkpllsel_use_hfpclkpll,
+};
+
+static struct __prci_wrpll_data __prci_cltxpll_data = {
+       .cfg0_offs = PRCI_CLTXPLLCFG0_OFFSET,
+       .cfg1_offs = PRCI_CLTXPLLCFG1_OFFSET,
+};
+
+/* Linux clock framework integration */
+
+static const struct clk_ops sifive_fu740_prci_wrpll_clk_ops = {
+       .set_rate = sifive_prci_wrpll_set_rate,
+       .round_rate = sifive_prci_wrpll_round_rate,
+       .recalc_rate = sifive_prci_wrpll_recalc_rate,
+       .enable = sifive_prci_clock_enable,
+       .disable = sifive_prci_clock_disable,
+       .is_enabled = sifive_clk_is_enabled,
+};
+
+static const struct clk_ops sifive_fu740_prci_wrpll_ro_clk_ops = {
+       .recalc_rate = sifive_prci_wrpll_recalc_rate,
+};
+
+static const struct clk_ops sifive_fu740_prci_tlclksel_clk_ops = {
+       .recalc_rate = sifive_prci_tlclksel_recalc_rate,
+};
+
+static const struct clk_ops sifive_fu740_prci_hfpclkplldiv_clk_ops = {
+       .recalc_rate = sifive_prci_hfpclkplldiv_recalc_rate,
+};
+
+/* List of clock controls provided by the PRCI */
+struct __prci_clock __prci_init_clocks_fu740[] = {
+       [PRCI_CLK_COREPLL] = {
+               .name = "corepll",
+               .parent_name = "hfclk",
+               .ops = &sifive_fu740_prci_wrpll_clk_ops,
+               .pwd = &__prci_corepll_data,
+       },
+       [PRCI_CLK_DDRPLL] = {
+               .name = "ddrpll",
+               .parent_name = "hfclk",
+               .ops = &sifive_fu740_prci_wrpll_ro_clk_ops,
+               .pwd = &__prci_ddrpll_data,
+       },
+       [PRCI_CLK_GEMGXLPLL] = {
+               .name = "gemgxlpll",
+               .parent_name = "hfclk",
+               .ops = &sifive_fu740_prci_wrpll_clk_ops,
+               .pwd = &__prci_gemgxlpll_data,
+       },
+       [PRCI_CLK_DVFSCOREPLL] = {
+               .name = "dvfscorepll",
+               .parent_name = "hfclk",
+               .ops = &sifive_fu740_prci_wrpll_clk_ops,
+               .pwd = &__prci_dvfscorepll_data,
+       },
+       [PRCI_CLK_HFPCLKPLL] = {
+               .name = "hfpclkpll",
+               .parent_name = "hfclk",
+               .ops = &sifive_fu740_prci_wrpll_clk_ops,
+               .pwd = &__prci_hfpclkpll_data,
+       },
+       [PRCI_CLK_CLTXPLL] = {
+               .name = "cltxpll",
+               .parent_name = "hfclk",
+               .ops = &sifive_fu740_prci_wrpll_clk_ops,
+               .pwd = &__prci_cltxpll_data,
+       },
+       [PRCI_CLK_TLCLK] = {
+               .name = "tlclk",
+               .parent_name = "corepll",
+               .ops = &sifive_fu740_prci_tlclksel_clk_ops,
+       },
+       [PRCI_CLK_PCLK] = {
+               .name = "pclk",
+               .parent_name = "hfpclkpll",
+               .ops = &sifive_fu740_prci_hfpclkplldiv_clk_ops,
+       },
+};
diff --git a/drivers/clk/sifive/fu740-prci.h b/drivers/clk/sifive/fu740-prci.h
new file mode 100644 (file)
index 0000000..13ef971
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 SiFive, Inc.
+ * Zong Li
+ */
+
+#ifndef __SIFIVE_CLK_FU740_PRCI_H
+#define __SIFIVE_CLK_FU740_PRCI_H
+
+#include "sifive-prci.h"
+
+#define NUM_CLOCK_FU740        8
+
+extern struct __prci_clock __prci_init_clocks_fu740[NUM_CLOCK_FU740];
+
+static const struct prci_clk_desc prci_clk_fu740 = {
+       .clks = __prci_init_clocks_fu740,
+       .num_clks = ARRAY_SIZE(__prci_init_clocks_fu740),
+};
+
+#endif /* __SIFIVE_CLK_FU740_PRCI_H */
diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c
new file mode 100644 (file)
index 0000000..c78b042
--- /dev/null
@@ -0,0 +1,574 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 SiFive, Inc.
+ * Copyright (C) 2020 Zong Li
+ */
+
+#include <linux/clkdev.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of_device.h>
+#include "sifive-prci.h"
+#include "fu540-prci.h"
+#include "fu740-prci.h"
+
+/*
+ * Private functions
+ */
+
+/**
+ * __prci_readl() - read from a PRCI register
+ * @pd: PRCI context
+ * @offs: register offset to read from (in bytes, from PRCI base address)
+ *
+ * Read the register located at offset @offs from the base virtual
+ * address of the PRCI register target described by @pd, and return
+ * the value to the caller.
+ *
+ * Context: Any context.
+ *
+ * Return: the contents of the register described by @pd and @offs.
+ */
+static u32 __prci_readl(struct __prci_data *pd, u32 offs)
+{
+       return readl_relaxed(pd->va + offs);
+}
+
+static void __prci_writel(u32 v, u32 offs, struct __prci_data *pd)
+{
+       writel_relaxed(v, pd->va + offs);
+}
+
+/* WRPLL-related private functions */
+
+/**
+ * __prci_wrpll_unpack() - unpack WRPLL configuration registers into parameters
+ * @c: ptr to a struct wrpll_cfg record to write config into
+ * @r: value read from the PRCI PLL configuration register
+ *
+ * Given a value @r read from an FU740 PRCI PLL configuration register,
+ * split it into fields and populate it into the WRPLL configuration record
+ * pointed to by @c.
+ *
+ * The COREPLLCFG0 macros are used below, but the other *PLLCFG0 macros
+ * have the same register layout.
+ *
+ * Context: Any context.
+ */
+static void __prci_wrpll_unpack(struct wrpll_cfg *c, u32 r)
+{
+       u32 v;
+
+       v = r & PRCI_COREPLLCFG0_DIVR_MASK;
+       v >>= PRCI_COREPLLCFG0_DIVR_SHIFT;
+       c->divr = v;
+
+       v = r & PRCI_COREPLLCFG0_DIVF_MASK;
+       v >>= PRCI_COREPLLCFG0_DIVF_SHIFT;
+       c->divf = v;
+
+       v = r & PRCI_COREPLLCFG0_DIVQ_MASK;
+       v >>= PRCI_COREPLLCFG0_DIVQ_SHIFT;
+       c->divq = v;
+
+       v = r & PRCI_COREPLLCFG0_RANGE_MASK;
+       v >>= PRCI_COREPLLCFG0_RANGE_SHIFT;
+       c->range = v;
+
+       c->flags &=
+           (WRPLL_FLAGS_INT_FEEDBACK_MASK | WRPLL_FLAGS_EXT_FEEDBACK_MASK);
+
+       /* external feedback mode not supported */
+       c->flags |= WRPLL_FLAGS_INT_FEEDBACK_MASK;
+}
+
+/**
+ * __prci_wrpll_pack() - pack PLL configuration parameters into a register value
+ * @c: pointer to a struct wrpll_cfg record containing the PLL's cfg
+ *
+ * Using a set of WRPLL configuration values pointed to by @c,
+ * assemble a PRCI PLL configuration register value, and return it to
+ * the caller.
+ *
+ * Context: Any context.  Caller must ensure that the contents of the
+ *          record pointed to by @c do not change during the execution
+ *          of this function.
+ *
+ * Returns: a value suitable for writing into a PRCI PLL configuration
+ *          register
+ */
+static u32 __prci_wrpll_pack(const struct wrpll_cfg *c)
+{
+       u32 r = 0;
+
+       r |= c->divr << PRCI_COREPLLCFG0_DIVR_SHIFT;
+       r |= c->divf << PRCI_COREPLLCFG0_DIVF_SHIFT;
+       r |= c->divq << PRCI_COREPLLCFG0_DIVQ_SHIFT;
+       r |= c->range << PRCI_COREPLLCFG0_RANGE_SHIFT;
+
+       /* external feedback mode not supported */
+       r |= PRCI_COREPLLCFG0_FSE_MASK;
+
+       return r;
+}
+
+/**
+ * __prci_wrpll_read_cfg0() - read the WRPLL configuration from the PRCI
+ * @pd: PRCI context
+ * @pwd: PRCI WRPLL metadata
+ *
+ * Read the current configuration of the PLL identified by @pwd from
+ * the PRCI identified by @pd, and store it into the local configuration
+ * cache in @pwd.
+ *
+ * Context: Any context.  Caller must prevent the records pointed to by
+ *          @pd and @pwd from changing during execution.
+ */
+static void __prci_wrpll_read_cfg0(struct __prci_data *pd,
+                                  struct __prci_wrpll_data *pwd)
+{
+       __prci_wrpll_unpack(&pwd->c, __prci_readl(pd, pwd->cfg0_offs));
+}
+
+/**
+ * __prci_wrpll_write_cfg0() - write WRPLL configuration into the PRCI
+ * @pd: PRCI context
+ * @pwd: PRCI WRPLL metadata
+ * @c: WRPLL configuration record to write
+ *
+ * Write the WRPLL configuration described by @c into the WRPLL
+ * configuration register identified by @pwd in the PRCI instance
+ * described by @c.  Make a cached copy of the WRPLL's current
+ * configuration so it can be used by other code.
+ *
+ * Context: Any context.  Caller must prevent the records pointed to by
+ *          @pd and @pwd from changing during execution.
+ */
+static void __prci_wrpll_write_cfg0(struct __prci_data *pd,
+                                   struct __prci_wrpll_data *pwd,
+                                   struct wrpll_cfg *c)
+{
+       __prci_writel(__prci_wrpll_pack(c), pwd->cfg0_offs, pd);
+
+       memcpy(&pwd->c, c, sizeof(*c));
+}
+
+/**
+ * __prci_wrpll_write_cfg1() - write Clock enable/disable configuration
+ * into the PRCI
+ * @pd: PRCI context
+ * @pwd: PRCI WRPLL metadata
+ * @enable: Clock enable or disable value
+ */
+static void __prci_wrpll_write_cfg1(struct __prci_data *pd,
+                                   struct __prci_wrpll_data *pwd,
+                                   u32 enable)
+{
+       __prci_writel(enable, pwd->cfg1_offs, pd);
+}
+
+/*
+ * Linux clock framework integration
+ *
+ * See the Linux clock framework documentation for more information on
+ * these functions.
+ */
+
+unsigned long sifive_prci_wrpll_recalc_rate(struct clk_hw *hw,
+                                           unsigned long parent_rate)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_wrpll_data *pwd = pc->pwd;
+
+       return wrpll_calc_output_rate(&pwd->c, parent_rate);
+}
+
+long sifive_prci_wrpll_round_rate(struct clk_hw *hw,
+                                 unsigned long rate,
+                                 unsigned long *parent_rate)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_wrpll_data *pwd = pc->pwd;
+       struct wrpll_cfg c;
+
+       memcpy(&c, &pwd->c, sizeof(c));
+
+       wrpll_configure_for_rate(&c, rate, *parent_rate);
+
+       return wrpll_calc_output_rate(&c, *parent_rate);
+}
+
+int sifive_prci_wrpll_set_rate(struct clk_hw *hw,
+                              unsigned long rate, unsigned long parent_rate)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_wrpll_data *pwd = pc->pwd;
+       struct __prci_data *pd = pc->pd;
+       int r;
+
+       r = wrpll_configure_for_rate(&pwd->c, rate, parent_rate);
+       if (r)
+               return r;
+
+       if (pwd->enable_bypass)
+               pwd->enable_bypass(pd);
+
+       __prci_wrpll_write_cfg0(pd, pwd, &pwd->c);
+
+       udelay(wrpll_calc_max_lock_us(&pwd->c));
+
+       return 0;
+}
+
+int sifive_clk_is_enabled(struct clk_hw *hw)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_wrpll_data *pwd = pc->pwd;
+       struct __prci_data *pd = pc->pd;
+       u32 r;
+
+       r = __prci_readl(pd, pwd->cfg1_offs);
+
+       if (r & PRCI_COREPLLCFG1_CKE_MASK)
+               return 1;
+       else
+               return 0;
+}
+
+int sifive_prci_clock_enable(struct clk_hw *hw)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_wrpll_data *pwd = pc->pwd;
+       struct __prci_data *pd = pc->pd;
+
+       if (sifive_clk_is_enabled(hw))
+               return 0;
+
+       __prci_wrpll_write_cfg1(pd, pwd, PRCI_COREPLLCFG1_CKE_MASK);
+
+       if (pwd->disable_bypass)
+               pwd->disable_bypass(pd);
+
+       return 0;
+}
+
+void sifive_prci_clock_disable(struct clk_hw *hw)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_wrpll_data *pwd = pc->pwd;
+       struct __prci_data *pd = pc->pd;
+       u32 r;
+
+       if (pwd->enable_bypass)
+               pwd->enable_bypass(pd);
+
+       r = __prci_readl(pd, pwd->cfg1_offs);
+       r &= ~PRCI_COREPLLCFG1_CKE_MASK;
+
+       __prci_wrpll_write_cfg1(pd, pwd, r);
+}
+
+/* TLCLKSEL clock integration */
+
+unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw,
+                                              unsigned long parent_rate)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_data *pd = pc->pd;
+       u32 v;
+       u8 div;
+
+       v = __prci_readl(pd, PRCI_CLKMUXSTATUSREG_OFFSET);
+       v &= PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK;
+       div = v ? 1 : 2;
+
+       return div_u64(parent_rate, div);
+}
+
+/* HFPCLK clock integration */
+
+unsigned long sifive_prci_hfpclkplldiv_recalc_rate(struct clk_hw *hw,
+                                                  unsigned long parent_rate)
+{
+       struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+       struct __prci_data *pd = pc->pd;
+       u32 div = __prci_readl(pd, PRCI_HFPCLKPLLDIV_OFFSET);
+
+       return div_u64(parent_rate, div + 2);
+}
+
+/*
+ * Core clock mux control
+ */
+
+/**
+ * sifive_prci_coreclksel_use_hfclk() - switch the CORECLK mux to output HFCLK
+ * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg
+ *
+ * Switch the CORECLK mux to the HFCLK input source; return once complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_CORECLKSEL_OFFSET register.
+ */
+void sifive_prci_coreclksel_use_hfclk(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);
+       r |= PRCI_CORECLKSEL_CORECLKSEL_MASK;
+       __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);   /* barrier */
+}
+
+/**
+ * sifive_prci_coreclksel_use_corepll() - switch the CORECLK mux to output
+ * COREPLL
+ * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg
+ *
+ * Switch the CORECLK mux to the COREPLL output clock; return once complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_CORECLKSEL_OFFSET register.
+ */
+void sifive_prci_coreclksel_use_corepll(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);
+       r &= ~PRCI_CORECLKSEL_CORECLKSEL_MASK;
+       __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);   /* barrier */
+}
+
+/**
+ * sifive_prci_coreclksel_use_final_corepll() - switch the CORECLK mux to output
+ * FINAL_COREPLL
+ * @pd: struct __prci_data * for the PRCI containing the CORECLK mux reg
+ *
+ * Switch the CORECLK mux to the final COREPLL output clock; return once
+ * complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_CORECLKSEL_OFFSET register.
+ */
+void sifive_prci_coreclksel_use_final_corepll(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);
+       r &= ~PRCI_CORECLKSEL_CORECLKSEL_MASK;
+       __prci_writel(r, PRCI_CORECLKSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_CORECLKSEL_OFFSET);   /* barrier */
+}
+
+/**
+ * sifive_prci_corepllsel_use_dvfscorepll() - switch the COREPLL mux to
+ * output DVFS_COREPLL
+ * @pd: struct __prci_data * for the PRCI containing the COREPLL mux reg
+ *
+ * Switch the COREPLL mux to the DVFSCOREPLL output clock; return once complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_COREPLLSEL_OFFSET register.
+ */
+void sifive_prci_corepllsel_use_dvfscorepll(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET);
+       r |= PRCI_COREPLLSEL_COREPLLSEL_MASK;
+       __prci_writel(r, PRCI_COREPLLSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET);   /* barrier */
+}
+
+/**
+ * sifive_prci_corepllsel_use_corepll() - switch the COREPLL mux to
+ * output COREPLL
+ * @pd: struct __prci_data * for the PRCI containing the COREPLL mux reg
+ *
+ * Switch the COREPLL mux to the COREPLL output clock; return once complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_COREPLLSEL_OFFSET register.
+ */
+void sifive_prci_corepllsel_use_corepll(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET);
+       r &= ~PRCI_COREPLLSEL_COREPLLSEL_MASK;
+       __prci_writel(r, PRCI_COREPLLSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_COREPLLSEL_OFFSET);   /* barrier */
+}
+
+/**
+ * sifive_prci_hfpclkpllsel_use_hfclk() - switch the HFPCLKPLL mux to
+ * output HFCLK
+ * @pd: struct __prci_data * for the PRCI containing the HFPCLKPLL mux reg
+ *
+ * Switch the HFPCLKPLL mux to the HFCLK input source; return once complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_HFPCLKPLLSEL_OFFSET register.
+ */
+void sifive_prci_hfpclkpllsel_use_hfclk(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET);
+       r |= PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_MASK;
+       __prci_writel(r, PRCI_HFPCLKPLLSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); /* barrier */
+}
+
+/**
+ * sifive_prci_hfpclkpllsel_use_hfpclkpll() - switch the HFPCLKPLL mux to
+ * output HFPCLKPLL
+ * @pd: struct __prci_data * for the PRCI containing the HFPCLKPLL mux reg
+ *
+ * Switch the HFPCLKPLL mux to the HFPCLKPLL output clock; return once complete.
+ *
+ * Context: Any context.  Caller must prevent concurrent changes to the
+ *          PRCI_HFPCLKPLLSEL_OFFSET register.
+ */
+void sifive_prci_hfpclkpllsel_use_hfpclkpll(struct __prci_data *pd)
+{
+       u32 r;
+
+       r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET);
+       r &= ~PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_MASK;
+       __prci_writel(r, PRCI_HFPCLKPLLSEL_OFFSET, pd);
+
+       r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); /* barrier */
+}
+
+/**
+ * __prci_register_clocks() - register clock controls in the PRCI
+ * @dev: Linux struct device
+ * @pd: The pointer for PRCI per-device instance data
+ * @desc: The pointer for the information of clocks of each SoCs
+ *
+ * Register the list of clock controls described in __prci_init_clocks[] with
+ * the Linux clock framework.
+ *
+ * Return: 0 upon success or a negative error code upon failure.
+ */
+static int __prci_register_clocks(struct device *dev, struct __prci_data *pd,
+                                 const struct prci_clk_desc *desc)
+{
+       struct clk_init_data init = { };
+       struct __prci_clock *pic;
+       int parent_count, i, r;
+
+       parent_count = of_clk_get_parent_count(dev->of_node);
+       if (parent_count != EXPECTED_CLK_PARENT_COUNT) {
+               dev_err(dev, "expected only two parent clocks, found %d\n",
+                       parent_count);
+               return -EINVAL;
+       }
+
+       /* Register PLLs */
+       for (i = 0; i < desc->num_clks; ++i) {
+               pic = &(desc->clks[i]);
+
+               init.name = pic->name;
+               init.parent_names = &pic->parent_name;
+               init.num_parents = 1;
+               init.ops = pic->ops;
+               pic->hw.init = &init;
+
+               pic->pd = pd;
+
+               if (pic->pwd)
+                       __prci_wrpll_read_cfg0(pd, pic->pwd);
+
+               r = devm_clk_hw_register(dev, &pic->hw);
+               if (r) {
+                       dev_warn(dev, "Failed to register clock %s: %d\n",
+                                init.name, r);
+                       return r;
+               }
+
+               r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev));
+               if (r) {
+                       dev_warn(dev, "Failed to register clkdev for %s: %d\n",
+                                init.name, r);
+                       return r;
+               }
+
+               pd->hw_clks.hws[i] = &pic->hw;
+       }
+
+       pd->hw_clks.num = i;
+
+       r = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+                                       &pd->hw_clks);
+       if (r) {
+               dev_err(dev, "could not add hw_provider: %d\n", r);
+               return r;
+       }
+
+       return 0;
+}
+
+/**
+ * sifive_prci_init() - initialize prci data and check parent count
+ * @pdev: platform device pointer for the prci
+ *
+ * Return: 0 upon success or a negative error code upon failure.
+ */
+static int sifive_prci_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *res;
+       struct __prci_data *pd;
+       const struct prci_clk_desc *desc;
+       int r;
+
+       desc = of_device_get_match_data(&pdev->dev);
+
+       pd = devm_kzalloc(dev, struct_size(pd, hw_clks.hws, desc->num_clks), GFP_KERNEL);
+       if (!pd)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       pd->va = devm_ioremap_resource(dev, res);
+       if (IS_ERR(pd->va))
+               return PTR_ERR(pd->va);
+
+       r = __prci_register_clocks(dev, pd, desc);
+       if (r) {
+               dev_err(dev, "could not register clocks: %d\n", r);
+               return r;
+       }
+
+       dev_dbg(dev, "SiFive PRCI probed\n");
+
+       return 0;
+}
+
+static const struct of_device_id sifive_prci_of_match[] = {
+       {.compatible = "sifive,fu540-c000-prci", .data = &prci_clk_fu540},
+       {.compatible = "sifive,fu740-c000-prci", .data = &prci_clk_fu740},
+       {}
+};
+
+static struct platform_driver sifive_prci_driver = {
+       .driver = {
+               .name = "sifive-clk-prci",
+               .of_match_table = sifive_prci_of_match,
+       },
+       .probe = sifive_prci_probe,
+};
+
+static int __init sifive_prci_init(void)
+{
+       return platform_driver_register(&sifive_prci_driver);
+}
+core_initcall(sifive_prci_init);
diff --git a/drivers/clk/sifive/sifive-prci.h b/drivers/clk/sifive/sifive-prci.h
new file mode 100644 (file)
index 0000000..dbdbd17
--- /dev/null
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018-2019 SiFive, Inc.
+ * Wesley Terpstra
+ * Paul Walmsley
+ * Zong Li
+ */
+
+#ifndef __SIFIVE_CLK_SIFIVE_PRCI_H
+#define __SIFIVE_CLK_SIFIVE_PRCI_H
+
+#include <linux/clk/analogbits-wrpll-cln28hpc.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+
+/*
+ * EXPECTED_CLK_PARENT_COUNT: how many parent clocks this driver expects:
+ *     hfclk and rtcclk
+ */
+#define EXPECTED_CLK_PARENT_COUNT 2
+
+/*
+ * Register offsets and bitmasks
+ */
+
+/* COREPLLCFG0 */
+#define PRCI_COREPLLCFG0_OFFSET                0x4
+#define PRCI_COREPLLCFG0_DIVR_SHIFT    0
+#define PRCI_COREPLLCFG0_DIVR_MASK     (0x3f << PRCI_COREPLLCFG0_DIVR_SHIFT)
+#define PRCI_COREPLLCFG0_DIVF_SHIFT    6
+#define PRCI_COREPLLCFG0_DIVF_MASK     (0x1ff << PRCI_COREPLLCFG0_DIVF_SHIFT)
+#define PRCI_COREPLLCFG0_DIVQ_SHIFT    15
+#define PRCI_COREPLLCFG0_DIVQ_MASK     (0x7 << PRCI_COREPLLCFG0_DIVQ_SHIFT)
+#define PRCI_COREPLLCFG0_RANGE_SHIFT   18
+#define PRCI_COREPLLCFG0_RANGE_MASK    (0x7 << PRCI_COREPLLCFG0_RANGE_SHIFT)
+#define PRCI_COREPLLCFG0_BYPASS_SHIFT  24
+#define PRCI_COREPLLCFG0_BYPASS_MASK   (0x1 << PRCI_COREPLLCFG0_BYPASS_SHIFT)
+#define PRCI_COREPLLCFG0_FSE_SHIFT     25
+#define PRCI_COREPLLCFG0_FSE_MASK      (0x1 << PRCI_COREPLLCFG0_FSE_SHIFT)
+#define PRCI_COREPLLCFG0_LOCK_SHIFT    31
+#define PRCI_COREPLLCFG0_LOCK_MASK     (0x1 << PRCI_COREPLLCFG0_LOCK_SHIFT)
+
+/* COREPLLCFG1 */
+#define PRCI_COREPLLCFG1_OFFSET                0x8
+#define PRCI_COREPLLCFG1_CKE_SHIFT     31
+#define PRCI_COREPLLCFG1_CKE_MASK      (0x1 << PRCI_COREPLLCFG1_CKE_SHIFT)
+
+/* DDRPLLCFG0 */
+#define PRCI_DDRPLLCFG0_OFFSET         0xc
+#define PRCI_DDRPLLCFG0_DIVR_SHIFT     0
+#define PRCI_DDRPLLCFG0_DIVR_MASK      (0x3f << PRCI_DDRPLLCFG0_DIVR_SHIFT)
+#define PRCI_DDRPLLCFG0_DIVF_SHIFT     6
+#define PRCI_DDRPLLCFG0_DIVF_MASK      (0x1ff << PRCI_DDRPLLCFG0_DIVF_SHIFT)
+#define PRCI_DDRPLLCFG0_DIVQ_SHIFT     15
+#define PRCI_DDRPLLCFG0_DIVQ_MASK      (0x7 << PRCI_DDRPLLCFG0_DIVQ_SHIFT)
+#define PRCI_DDRPLLCFG0_RANGE_SHIFT    18
+#define PRCI_DDRPLLCFG0_RANGE_MASK     (0x7 << PRCI_DDRPLLCFG0_RANGE_SHIFT)
+#define PRCI_DDRPLLCFG0_BYPASS_SHIFT   24
+#define PRCI_DDRPLLCFG0_BYPASS_MASK    (0x1 << PRCI_DDRPLLCFG0_BYPASS_SHIFT)
+#define PRCI_DDRPLLCFG0_FSE_SHIFT      25
+#define PRCI_DDRPLLCFG0_FSE_MASK       (0x1 << PRCI_DDRPLLCFG0_FSE_SHIFT)
+#define PRCI_DDRPLLCFG0_LOCK_SHIFT     31
+#define PRCI_DDRPLLCFG0_LOCK_MASK      (0x1 << PRCI_DDRPLLCFG0_LOCK_SHIFT)
+
+/* DDRPLLCFG1 */
+#define PRCI_DDRPLLCFG1_OFFSET         0x10
+#define PRCI_DDRPLLCFG1_CKE_SHIFT      31
+#define PRCI_DDRPLLCFG1_CKE_MASK       (0x1 << PRCI_DDRPLLCFG1_CKE_SHIFT)
+
+/* GEMGXLPLLCFG0 */
+#define PRCI_GEMGXLPLLCFG0_OFFSET      0x1c
+#define PRCI_GEMGXLPLLCFG0_DIVR_SHIFT  0
+#define PRCI_GEMGXLPLLCFG0_DIVR_MASK   (0x3f << PRCI_GEMGXLPLLCFG0_DIVR_SHIFT)
+#define PRCI_GEMGXLPLLCFG0_DIVF_SHIFT  6
+#define PRCI_GEMGXLPLLCFG0_DIVF_MASK   (0x1ff << PRCI_GEMGXLPLLCFG0_DIVF_SHIFT)
+#define PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT  15
+#define PRCI_GEMGXLPLLCFG0_DIVQ_MASK   (0x7 << PRCI_GEMGXLPLLCFG0_DIVQ_SHIFT)
+#define PRCI_GEMGXLPLLCFG0_RANGE_SHIFT 18
+#define PRCI_GEMGXLPLLCFG0_RANGE_MASK  (0x7 << PRCI_GEMGXLPLLCFG0_RANGE_SHIFT)
+#define PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT        24
+#define PRCI_GEMGXLPLLCFG0_BYPASS_MASK (0x1 << PRCI_GEMGXLPLLCFG0_BYPASS_SHIFT)
+#define PRCI_GEMGXLPLLCFG0_FSE_SHIFT   25
+#define PRCI_GEMGXLPLLCFG0_FSE_MASK    (0x1 << PRCI_GEMGXLPLLCFG0_FSE_SHIFT)
+#define PRCI_GEMGXLPLLCFG0_LOCK_SHIFT  31
+#define PRCI_GEMGXLPLLCFG0_LOCK_MASK   (0x1 << PRCI_GEMGXLPLLCFG0_LOCK_SHIFT)
+
+/* GEMGXLPLLCFG1 */
+#define PRCI_GEMGXLPLLCFG1_OFFSET      0x20
+#define PRCI_GEMGXLPLLCFG1_CKE_SHIFT   31
+#define PRCI_GEMGXLPLLCFG1_CKE_MASK    (0x1 << PRCI_GEMGXLPLLCFG1_CKE_SHIFT)
+
+/* CORECLKSEL */
+#define PRCI_CORECLKSEL_OFFSET                 0x24
+#define PRCI_CORECLKSEL_CORECLKSEL_SHIFT       0
+#define PRCI_CORECLKSEL_CORECLKSEL_MASK                                        \
+               (0x1 << PRCI_CORECLKSEL_CORECLKSEL_SHIFT)
+
+/* DEVICESRESETREG */
+#define PRCI_DEVICESRESETREG_OFFSET                            0x28
+#define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT              0
+#define PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_MASK                       \
+               (0x1 << PRCI_DEVICESRESETREG_DDR_CTRL_RST_N_SHIFT)
+#define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT               1
+#define PRCI_DEVICESRESETREG_DDR_AXI_RST_N_MASK                                \
+               (0x1 << PRCI_DEVICESRESETREG_DDR_AXI_RST_N_SHIFT)
+#define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT               2
+#define PRCI_DEVICESRESETREG_DDR_AHB_RST_N_MASK                                \
+               (0x1 << PRCI_DEVICESRESETREG_DDR_AHB_RST_N_SHIFT)
+#define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT               3
+#define PRCI_DEVICESRESETREG_DDR_PHY_RST_N_MASK                                \
+               (0x1 << PRCI_DEVICESRESETREG_DDR_PHY_RST_N_SHIFT)
+#define PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT                        5
+#define PRCI_DEVICESRESETREG_GEMGXL_RST_N_MASK                         \
+               (0x1 << PRCI_DEVICESRESETREG_GEMGXL_RST_N_SHIFT)
+#define PRCI_DEVICESRESETREG_CHIPLINK_RST_N_SHIFT              6
+#define PRCI_DEVICESRESETREG_CHIPLINK_RST_N_MASK                       \
+               (0x1 << PRCI_DEVICESRESETREG_CHIPLINK_RST_N_SHIFT)
+
+/* CLKMUXSTATUSREG */
+#define PRCI_CLKMUXSTATUSREG_OFFSET                            0x2c
+#define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT             1
+#define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_MASK                      \
+               (0x1 << PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT)
+
+/* CLTXPLLCFG0 */
+#define PRCI_CLTXPLLCFG0_OFFSET                0x30
+#define PRCI_CLTXPLLCFG0_DIVR_SHIFT    0
+#define PRCI_CLTXPLLCFG0_DIVR_MASK     (0x3f << PRCI_CLTXPLLCFG0_DIVR_SHIFT)
+#define PRCI_CLTXPLLCFG0_DIVF_SHIFT    6
+#define PRCI_CLTXPLLCFG0_DIVF_MASK     (0x1ff << PRCI_CLTXPLLCFG0_DIVF_SHIFT)
+#define PRCI_CLTXPLLCFG0_DIVQ_SHIFT    15
+#define PRCI_CLTXPLLCFG0_DIVQ_MASK     (0x7 << PRCI_CLTXPLLCFG0_DIVQ_SHIFT)
+#define PRCI_CLTXPLLCFG0_RANGE_SHIFT   18
+#define PRCI_CLTXPLLCFG0_RANGE_MASK    (0x7 << PRCI_CLTXPLLCFG0_RANGE_SHIFT)
+#define PRCI_CLTXPLLCFG0_BYPASS_SHIFT  24
+#define PRCI_CLTXPLLCFG0_BYPASS_MASK   (0x1 << PRCI_CLTXPLLCFG0_BYPASS_SHIFT)
+#define PRCI_CLTXPLLCFG0_FSE_SHIFT     25
+#define PRCI_CLTXPLLCFG0_FSE_MASK      (0x1 << PRCI_CLTXPLLCFG0_FSE_SHIFT)
+#define PRCI_CLTXPLLCFG0_LOCK_SHIFT    31
+#define PRCI_CLTXPLLCFG0_LOCK_MASK     (0x1 << PRCI_CLTXPLLCFG0_LOCK_SHIFT)
+
+/* CLTXPLLCFG1 */
+#define PRCI_CLTXPLLCFG1_OFFSET                0x34
+#define PRCI_CLTXPLLCFG1_CKE_SHIFT     31
+#define PRCI_CLTXPLLCFG1_CKE_MASK      (0x1 << PRCI_CLTXPLLCFG1_CKE_SHIFT)
+
+/* DVFSCOREPLLCFG0 */
+#define PRCI_DVFSCOREPLLCFG0_OFFSET    0x38
+
+/* DVFSCOREPLLCFG1 */
+#define PRCI_DVFSCOREPLLCFG1_OFFSET    0x3c
+#define PRCI_DVFSCOREPLLCFG1_CKE_SHIFT 31
+#define PRCI_DVFSCOREPLLCFG1_CKE_MASK  (0x1 << PRCI_DVFSCOREPLLCFG1_CKE_SHIFT)
+
+/* COREPLLSEL */
+#define PRCI_COREPLLSEL_OFFSET                 0x40
+#define PRCI_COREPLLSEL_COREPLLSEL_SHIFT       0
+#define PRCI_COREPLLSEL_COREPLLSEL_MASK                                        \
+               (0x1 << PRCI_COREPLLSEL_COREPLLSEL_SHIFT)
+
+/* HFPCLKPLLCFG0 */
+#define PRCI_HFPCLKPLLCFG0_OFFSET              0x50
+#define PRCI_HFPCLKPLL_CFG0_DIVR_SHIFT         0
+#define PRCI_HFPCLKPLL_CFG0_DIVR_MASK                                  \
+               (0x3f << PRCI_HFPCLKPLLCFG0_DIVR_SHIFT)
+#define PRCI_HFPCLKPLL_CFG0_DIVF_SHIFT         6
+#define PRCI_HFPCLKPLL_CFG0_DIVF_MASK                                  \
+               (0x1ff << PRCI_HFPCLKPLLCFG0_DIVF_SHIFT)
+#define PRCI_HFPCLKPLL_CFG0_DIVQ_SHIFT         15
+#define PRCI_HFPCLKPLL_CFG0_DIVQ_MASK                                  \
+               (0x7 << PRCI_HFPCLKPLLCFG0_DIVQ_SHIFT)
+#define PRCI_HFPCLKPLL_CFG0_RANGE_SHIFT                18
+#define PRCI_HFPCLKPLL_CFG0_RANGE_MASK                                 \
+               (0x7 << PRCI_HFPCLKPLLCFG0_RANGE_SHIFT)
+#define PRCI_HFPCLKPLL_CFG0_BYPASS_SHIFT       24
+#define PRCI_HFPCLKPLL_CFG0_BYPASS_MASK                                        \
+               (0x1 << PRCI_HFPCLKPLLCFG0_BYPASS_SHIFT)
+#define PRCI_HFPCLKPLL_CFG0_FSE_SHIFT          25
+#define PRCI_HFPCLKPLL_CFG0_FSE_MASK                                   \
+               (0x1 << PRCI_HFPCLKPLLCFG0_FSE_SHIFT)
+#define PRCI_HFPCLKPLL_CFG0_LOCK_SHIFT         31
+#define PRCI_HFPCLKPLL_CFG0_LOCK_MASK                                  \
+               (0x1 << PRCI_HFPCLKPLLCFG0_LOCK_SHIFT)
+
+/* HFPCLKPLLCFG1 */
+#define PRCI_HFPCLKPLLCFG1_OFFSET              0x54
+#define PRCI_HFPCLKPLLCFG1_CKE_SHIFT           31
+#define PRCI_HFPCLKPLLCFG1_CKE_MASK                                    \
+               (0x1 << PRCI_HFPCLKPLLCFG1_CKE_SHIFT)
+
+/* HFPCLKPLLSEL */
+#define PRCI_HFPCLKPLLSEL_OFFSET               0x58
+#define PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_SHIFT   0
+#define PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_MASK                            \
+               (0x1 << PRCI_HFPCLKPLLSEL_HFPCLKPLLSEL_SHIFT)
+
+/* HFPCLKPLLDIV */
+#define PRCI_HFPCLKPLLDIV_OFFSET               0x5c
+
+/* PRCIPLL */
+#define PRCI_PRCIPLL_OFFSET                    0xe0
+
+/* PROCMONCFG */
+#define PRCI_PROCMONCFG_OFFSET                 0xf0
+
+/*
+ * Private structures
+ */
+
+/**
+ * struct __prci_data - per-device-instance data
+ * @va: base virtual address of the PRCI IP block
+ * @hw_clks: encapsulates struct clk_hw records
+ *
+ * PRCI per-device instance data
+ */
+struct __prci_data {
+       void __iomem *va;
+       struct clk_hw_onecell_data hw_clks;
+};
+
+/**
+ * struct __prci_wrpll_data - WRPLL configuration and integration data
+ * @c: WRPLL current configuration record
+ * @enable_bypass: fn ptr to code to bypass the WRPLL (if applicable; else NULL)
+ * @disable_bypass: fn ptr to code to not bypass the WRPLL (or NULL)
+ * @cfg0_offs: WRPLL CFG0 register offset (in bytes) from the PRCI base address
+ * @cfg1_offs: WRPLL CFG1 register offset (in bytes) from the PRCI base address
+ *
+ * @enable_bypass and @disable_bypass are used for WRPLL instances
+ * that contain a separate external glitchless clock mux downstream
+ * from the PLL.  The WRPLL internal bypass mux is not glitchless.
+ */
+struct __prci_wrpll_data {
+       struct wrpll_cfg c;
+       void (*enable_bypass)(struct __prci_data *pd);
+       void (*disable_bypass)(struct __prci_data *pd);
+       u8 cfg0_offs;
+       u8 cfg1_offs;
+};
+
+/**
+ * struct __prci_clock - describes a clock device managed by PRCI
+ * @name: user-readable clock name string - should match the manual
+ * @parent_name: parent name for this clock
+ * @ops: struct clk_ops for the Linux clock framework to use for control
+ * @hw: Linux-private clock data
+ * @pwd: WRPLL-specific data, associated with this clock (if not NULL)
+ * @pd: PRCI-specific data associated with this clock (if not NULL)
+ *
+ * PRCI clock data.  Used by the PRCI driver to register PRCI-provided
+ * clocks to the Linux clock infrastructure.
+ */
+struct __prci_clock {
+       const char *name;
+       const char *parent_name;
+       const struct clk_ops *ops;
+       struct clk_hw hw;
+       struct __prci_wrpll_data *pwd;
+       struct __prci_data *pd;
+};
+
+#define clk_hw_to_prci_clock(pwd) container_of(pwd, struct __prci_clock, hw)
+
+/*
+ * struct prci_clk_desc - describes the information of clocks of each SoCs
+ * @clks: point to a array of __prci_clock
+ * @num_clks: the number of element of clks
+ */
+struct prci_clk_desc {
+       struct __prci_clock *clks;
+       size_t num_clks;
+};
+
+/* Core clock mux control */
+void sifive_prci_coreclksel_use_hfclk(struct __prci_data *pd);
+void sifive_prci_coreclksel_use_corepll(struct __prci_data *pd);
+void sifive_prci_coreclksel_use_final_corepll(struct __prci_data *pd);
+void sifive_prci_corepllsel_use_dvfscorepll(struct __prci_data *pd);
+void sifive_prci_corepllsel_use_corepll(struct __prci_data *pd);
+void sifive_prci_hfpclkpllsel_use_hfclk(struct __prci_data *pd);
+void sifive_prci_hfpclkpllsel_use_hfpclkpll(struct __prci_data *pd);
+
+/* Linux clock framework integration */
+long sifive_prci_wrpll_round_rate(struct clk_hw *hw, unsigned long rate,
+                                 unsigned long *parent_rate);
+int sifive_prci_wrpll_set_rate(struct clk_hw *hw, unsigned long rate,
+                              unsigned long parent_rate);
+int sifive_clk_is_enabled(struct clk_hw *hw);
+int sifive_prci_clock_enable(struct clk_hw *hw);
+void sifive_prci_clock_disable(struct clk_hw *hw);
+unsigned long sifive_prci_wrpll_recalc_rate(struct clk_hw *hw,
+                                           unsigned long parent_rate);
+unsigned long sifive_prci_tlclksel_recalc_rate(struct clk_hw *hw,
+                                              unsigned long parent_rate);
+unsigned long sifive_prci_hfpclkplldiv_recalc_rate(struct clk_hw *hw,
+                                                  unsigned long parent_rate);
+
+#endif /* __SIFIVE_CLK_SIFIVE_PRCI_H */
index 5f66bf8..149cfde 100644 (file)
@@ -389,6 +389,7 @@ static struct clk_div_table ths_div_table[] = {
        { .val = 1, .div = 2 },
        { .val = 2, .div = 4 },
        { .val = 3, .div = 6 },
+       { /* Sentinel */ },
 };
 static const char * const ths_parents[] = { "osc24M" };
 static struct ccu_div ths_clk = {
index 6b63636..7e629a4 100644 (file)
@@ -322,6 +322,7 @@ static struct clk_div_table ths_div_table[] = {
        { .val = 1, .div = 2 },
        { .val = 2, .div = 4 },
        { .val = 3, .div = 6 },
+       { /* Sentinel */ },
 };
 static SUNXI_CCU_DIV_TABLE_WITH_GATE(ths_clk, "ths", "osc24M",
                                     0x074, 0, 2, ths_div_table, BIT(31), 0);
index a66263b..6ecf18f 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2016 NVIDIA Corporation
+ * Copyright (C) 2016-2020 NVIDIA Corporation
  */
 
 #include <linux/clk-provider.h>
@@ -174,7 +174,7 @@ static long tegra_bpmp_clk_round_rate(struct clk_hw *hw, unsigned long rate,
        int err;
 
        memset(&request, 0, sizeof(request));
-       request.rate = rate;
+       request.rate = min_t(u64, rate, S64_MAX);
 
        memset(&msg, 0, sizeof(msg));
        msg.cmd = CMD_CLK_ROUND_RATE;
@@ -256,7 +256,7 @@ static int tegra_bpmp_clk_set_rate(struct clk_hw *hw, unsigned long rate,
        struct tegra_bpmp_clk_message msg;
 
        memset(&request, 0, sizeof(request));
-       request.rate = rate;
+       request.rate = min_t(u64, rate, S64_MAX);
 
        memset(&msg, 0, sizeof(msg));
        msg.cmd = CMD_CLK_SET_RATE;
index cfbaa90..a5f526b 100644 (file)
@@ -1856,13 +1856,13 @@ static int dfll_fetch_pwm_params(struct tegra_dfll *td)
                            &td->reg_init_uV);
        if (!ret) {
                dev_err(td->dev, "couldn't get initialized voltage\n");
-               return ret;
+               return -EINVAL;
        }
 
        ret = read_dt_param(td, "nvidia,pwm-period-nanoseconds", &pwm_period);
        if (!ret) {
                dev_err(td->dev, "couldn't get PWM period\n");
-               return ret;
+               return -EINVAL;
        }
        td->pwm_rate = (NSEC_PER_SEC / pwm_period) * (MAX_DFLL_VOLTAGES - 1);
 
index ff7da2d..2441381 100644 (file)
@@ -227,6 +227,7 @@ enum clk_id {
        tegra_clk_sdmmc4,
        tegra_clk_sdmmc4_8,
        tegra_clk_se,
+       tegra_clk_se_10,
        tegra_clk_soc_therm,
        tegra_clk_soc_therm_8,
        tegra_clk_sor0,
index 2b2a3b8..60cc34f 100644 (file)
@@ -630,7 +630,7 @@ static struct tegra_periph_init_data periph_clks[] = {
        INT8("host1x", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x_8),
        INT8("host1x", mux_pllc4_out1_pllc_pllc4_out2_pllp_clkm_plla_pllc4_out0, CLK_SOURCE_HOST1X, 28, 0, tegra_clk_host1x_9),
        INT8("se", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se),
-       INT8("se", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se),
+       INT8("se", mux_pllp_pllc2_c_c3_clkm, CLK_SOURCE_SE, 127, TEGRA_PERIPH_ON_APB, tegra_clk_se_10),
        INT8("2d", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_2D, 21, 0, tegra_clk_gr2d_8),
        INT8("3d", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_3D, 24, 0, tegra_clk_gr3d_8),
        INT8("vic03", mux_pllm_pllc_pllp_plla_pllc2_c3_clkm, CLK_SOURCE_VIC03, 178, 0, tegra_clk_vic03),
index 8694bc9..f054239 100644 (file)
@@ -605,7 +605,7 @@ static struct ti_dt_clk omap54xx_clks[] = {
 int __init omap5xxx_dt_clk_init(void)
 {
        int rc;
-       struct clk *abe_dpll_ref, *abe_dpll, *sys_32k_ck, *usb_dpll;
+       struct clk *abe_dpll_ref, *abe_dpll, *abe_dpll_byp, *sys_32k_ck, *usb_dpll;
 
        ti_dt_clocks_register(omap54xx_clks);
 
@@ -616,6 +616,16 @@ int __init omap5xxx_dt_clk_init(void)
        abe_dpll_ref = clk_get_sys(NULL, "abe_dpll_clk_mux");
        sys_32k_ck = clk_get_sys(NULL, "sys_32k_ck");
        rc = clk_set_parent(abe_dpll_ref, sys_32k_ck);
+
+       /*
+        * This must also be set to sys_32k_ck to match or
+        * the ABE DPLL will not lock on a warm reboot when
+        * ABE timers are used.
+        */
+       abe_dpll_byp = clk_get_sys(NULL, "abe_dpll_bypass_clk_mux");
+       if (!rc)
+               rc = clk_set_parent(abe_dpll_byp, sys_32k_ck);
+
        abe_dpll = clk_get_sys(NULL, "dpll_abe_ck");
        if (!rc)
                rc = clk_set_rate(abe_dpll, OMAP5_DPLL_ABE_DEFFREQ);
index 95e36ba..8024c6d 100644 (file)
@@ -498,6 +498,7 @@ static struct clk * __init ti_fapll_synth_setup(struct fapll_data *fd,
 {
        struct clk_init_data *init;
        struct fapll_synth *synth;
+       struct clk *clk = ERR_PTR(-ENOMEM);
 
        init = kzalloc(sizeof(*init), GFP_KERNEL);
        if (!init)
@@ -520,13 +521,19 @@ static struct clk * __init ti_fapll_synth_setup(struct fapll_data *fd,
        synth->hw.init = init;
        synth->clk_pll = pll_clk;
 
-       return clk_register(NULL, &synth->hw);
+       clk = clk_register(NULL, &synth->hw);
+       if (IS_ERR(clk)) {
+               pr_err("failed to register clock\n");
+               goto free;
+       }
+
+       return clk;
 
 free:
        kfree(synth);
        kfree(init);
 
-       return ERR_PTR(-ENOMEM);
+       return clk;
 }
 
 static void __init ti_fapll_setup(struct device_node *node)
index 7cc9bd8..8a482c4 100644 (file)
 #define DMI_PROCESSOR_MAX_SPEED                0x14
 
 /*
- * These structs contain information parsed from per CPU
- * ACPI _CPC structures.
- * e.g. For each CPU the highest, lowest supported
- * performance capabilities, desired performance level
- * requested etc.
+ * This list contains information parsed from per CPU ACPI _CPC and _PSD
+ * structures: e.g. the highest and lowest supported performance, capabilities,
+ * desired performance, level requested etc. Depending on the share_type, not
+ * all CPUs will have an entry in the list.
  */
-static struct cppc_cpudata **all_cpu_data;
+static LIST_HEAD(cpu_data_list);
+
 static bool boost_supported;
 
 struct cppc_workaround_oem_info {
@@ -148,8 +148,10 @@ static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu_data,
 static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
                                   unsigned int target_freq,
                                   unsigned int relation)
+
 {
-       struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu];
+       struct cppc_cpudata *cpu_data = policy->driver_data;
+       unsigned int cpu = policy->cpu;
        struct cpufreq_freqs freqs;
        u32 desired_perf;
        int ret = 0;
@@ -164,12 +166,12 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
        freqs.new = target_freq;
 
        cpufreq_freq_transition_begin(policy, &freqs);
-       ret = cppc_set_perf(cpu_data->cpu, &cpu_data->perf_ctrls);
+       ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
        cpufreq_freq_transition_end(policy, &freqs, ret != 0);
 
        if (ret)
                pr_debug("Failed to set target on CPU:%d. ret:%d\n",
-                        cpu_data->cpu, ret);
+                        cpu, ret);
 
        return ret;
 }
@@ -182,7 +184,7 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy)
 
 static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 {
-       struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu];
+       struct cppc_cpudata *cpu_data = policy->driver_data;
        struct cppc_perf_caps *caps = &cpu_data->perf_caps;
        unsigned int cpu = policy->cpu;
        int ret;
@@ -193,6 +195,12 @@ static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
        if (ret)
                pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
                         caps->lowest_perf, cpu, ret);
+
+       /* Remove CPU node from list and free driver data for policy */
+       free_cpumask_var(cpu_data->shared_cpu_map);
+       list_del(&cpu_data->node);
+       kfree(policy->driver_data);
+       policy->driver_data = NULL;
 }
 
 /*
@@ -238,25 +246,61 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
 }
 #endif
 
-static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+
+static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu)
 {
-       struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu];
-       struct cppc_perf_caps *caps = &cpu_data->perf_caps;
-       unsigned int cpu = policy->cpu;
-       int ret = 0;
+       struct cppc_cpudata *cpu_data;
+       int ret;
 
-       cpu_data->cpu = cpu;
-       ret = cppc_get_perf_caps(cpu, caps);
+       cpu_data = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL);
+       if (!cpu_data)
+               goto out;
 
+       if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL))
+               goto free_cpu;
+
+       ret = acpi_get_psd_map(cpu, cpu_data);
        if (ret) {
-               pr_debug("Err reading CPU%d perf capabilities. ret:%d\n",
-                        cpu, ret);
-               return ret;
+               pr_debug("Err parsing CPU%d PSD data: ret:%d\n", cpu, ret);
+               goto free_mask;
+       }
+
+       ret = cppc_get_perf_caps(cpu, &cpu_data->perf_caps);
+       if (ret) {
+               pr_debug("Err reading CPU%d perf caps: ret:%d\n", cpu, ret);
+               goto free_mask;
        }
 
        /* Convert the lowest and nominal freq from MHz to KHz */
-       caps->lowest_freq *= 1000;
-       caps->nominal_freq *= 1000;
+       cpu_data->perf_caps.lowest_freq *= 1000;
+       cpu_data->perf_caps.nominal_freq *= 1000;
+
+       list_add(&cpu_data->node, &cpu_data_list);
+
+       return cpu_data;
+
+free_mask:
+       free_cpumask_var(cpu_data->shared_cpu_map);
+free_cpu:
+       kfree(cpu_data);
+out:
+       return NULL;
+}
+
+static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+       unsigned int cpu = policy->cpu;
+       struct cppc_cpudata *cpu_data;
+       struct cppc_perf_caps *caps;
+       int ret;
+
+       cpu_data = cppc_cpufreq_get_cpu_data(cpu);
+       if (!cpu_data) {
+               pr_err("Error in acquiring _CPC/_PSD data for CPU%d.\n", cpu);
+               return -ENODEV;
+       }
+       caps = &cpu_data->perf_caps;
+       policy->driver_data = cpu_data;
 
        /*
         * Set min to lowest nonlinear perf to avoid any efficiency penalty (see
@@ -280,26 +324,25 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
        policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
        policy->shared_type = cpu_data->shared_type;
 
-       if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
-               int i;
-
+       switch (policy->shared_type) {
+       case CPUFREQ_SHARED_TYPE_HW:
+       case CPUFREQ_SHARED_TYPE_NONE:
+               /* Nothing to be done - we'll have a policy for each CPU */
+               break;
+       case CPUFREQ_SHARED_TYPE_ANY:
+               /*
+                * All CPUs in the domain will share a policy and all cpufreq
+                * operations will use a single cppc_cpudata structure stored
+                * in policy->driver_data.
+                */
                cpumask_copy(policy->cpus, cpu_data->shared_cpu_map);
-
-               for_each_cpu(i, policy->cpus) {
-                       if (unlikely(i == cpu))
-                               continue;
-
-                       memcpy(&all_cpu_data[i]->perf_caps, caps,
-                              sizeof(cpu_data->perf_caps));
-               }
-       } else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
-               /* Support only SW_ANY for now. */
-               pr_debug("Unsupported CPU co-ord type\n");
+               break;
+       default:
+               pr_debug("Unsupported CPU co-ord type: %d\n",
+                        policy->shared_type);
                return -EFAULT;
        }
 
-       cpu_data->cur_policy = policy;
-
        /*
         * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
         * is supported.
@@ -354,9 +397,12 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
 static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 {
        struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
-       struct cppc_cpudata *cpu_data = all_cpu_data[cpu];
+       struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+       struct cppc_cpudata *cpu_data = policy->driver_data;
        int ret;
 
+       cpufreq_cpu_put(policy);
+
        ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
        if (ret)
                return ret;
@@ -372,7 +418,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 
 static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
 {
-       struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu];
+       struct cppc_cpudata *cpu_data = policy->driver_data;
        struct cppc_perf_caps *caps = &cpu_data->perf_caps;
        int ret;
 
@@ -396,6 +442,19 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
        return 0;
 }
 
+static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf)
+{
+       struct cppc_cpudata *cpu_data = policy->driver_data;
+
+       return cpufreq_show_cpus(cpu_data->shared_cpu_map, buf);
+}
+cpufreq_freq_attr_ro(freqdomain_cpus);
+
+static struct freq_attr *cppc_cpufreq_attr[] = {
+       &freqdomain_cpus,
+       NULL,
+};
+
 static struct cpufreq_driver cppc_cpufreq_driver = {
        .flags = CPUFREQ_CONST_LOOPS,
        .verify = cppc_verify_policy,
@@ -404,6 +463,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
        .init = cppc_cpufreq_cpu_init,
        .stop_cpu = cppc_cpufreq_stop_cpu,
        .set_boost = cppc_cpufreq_set_boost,
+       .attr = cppc_cpufreq_attr,
        .name = "cppc_cpufreq",
 };
 
@@ -415,10 +475,13 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
  */
 static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu)
 {
-       struct cppc_cpudata *cpu_data = all_cpu_data[cpu];
+       struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+       struct cppc_cpudata *cpu_data = policy->driver_data;
        u64 desired_perf;
        int ret;
 
+       cpufreq_cpu_put(policy);
+
        ret = cppc_get_desired_perf(cpu, &desired_perf);
        if (ret < 0)
                return -EIO;
@@ -451,68 +514,33 @@ static void cppc_check_hisi_workaround(void)
 
 static int __init cppc_cpufreq_init(void)
 {
-       struct cppc_cpudata *cpu_data;
-       int i, ret = 0;
-
-       if (acpi_disabled)
+       if ((acpi_disabled) || !acpi_cpc_valid())
                return -ENODEV;
 
-       all_cpu_data = kcalloc(num_possible_cpus(), sizeof(void *),
-                              GFP_KERNEL);
-       if (!all_cpu_data)
-               return -ENOMEM;
-
-       for_each_possible_cpu(i) {
-               all_cpu_data[i] = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL);
-               if (!all_cpu_data[i])
-                       goto out;
-
-               cpu_data = all_cpu_data[i];
-               if (!zalloc_cpumask_var(&cpu_data->shared_cpu_map, GFP_KERNEL))
-                       goto out;
-       }
-
-       ret = acpi_get_psd_map(all_cpu_data);
-       if (ret) {
-               pr_debug("Error parsing PSD data. Aborting cpufreq registration.\n");
-               goto out;
-       }
+       INIT_LIST_HEAD(&cpu_data_list);
 
        cppc_check_hisi_workaround();
 
-       ret = cpufreq_register_driver(&cppc_cpufreq_driver);
-       if (ret)
-               goto out;
+       return cpufreq_register_driver(&cppc_cpufreq_driver);
+}
 
-       return ret;
+static inline void free_cpu_data(void)
+{
+       struct cppc_cpudata *iter, *tmp;
 
-out:
-       for_each_possible_cpu(i) {
-               cpu_data = all_cpu_data[i];
-               if (!cpu_data)
-                       break;
-               free_cpumask_var(cpu_data->shared_cpu_map);
-               kfree(cpu_data);
+       list_for_each_entry_safe(iter, tmp, &cpu_data_list, node) {
+               free_cpumask_var(iter->shared_cpu_map);
+               list_del(&iter->node);
+               kfree(iter);
        }
 
-       kfree(all_cpu_data);
-       return -ENODEV;
 }
 
 static void __exit cppc_cpufreq_exit(void)
 {
-       struct cppc_cpudata *cpu_data;
-       int i;
-
        cpufreq_unregister_driver(&cppc_cpufreq_driver);
 
-       for_each_possible_cpu(i) {
-               cpu_data = all_cpu_data[i];
-               free_cpumask_var(cpu_data->shared_cpu_map);
-               kfree(cpu_data);
-       }
-
-       kfree(all_cpu_data);
+       free_cpu_data();
 }
 
 module_exit(cppc_cpufreq_exit);
index c17aa29..d0a3525 100644 (file)
@@ -2097,6 +2097,46 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 
+/**
+ * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
+ * @cpu: Target CPU.
+ * @min_perf: Minimum (required) performance level (units of @capacity).
+ * @target_perf: Terget (desired) performance level (units of @capacity).
+ * @capacity: Capacity of the target CPU.
+ *
+ * Carry out a fast performance level switch of @cpu without sleeping.
+ *
+ * The driver's ->adjust_perf() callback invoked by this function must be
+ * suitable for being called from within RCU-sched read-side critical sections
+ * and it is expected to select a suitable performance level equal to or above
+ * @min_perf and preferably equal to or below @target_perf.
+ *
+ * This function must not be called if policy->fast_switch_enabled is unset.
+ *
+ * Governors calling this function must guarantee that it will never be invoked
+ * twice in parallel for the same CPU and that it will never be called in
+ * parallel with either ->target() or ->target_index() or ->fast_switch() for
+ * the same CPU.
+ */
+void cpufreq_driver_adjust_perf(unsigned int cpu,
+                                unsigned long min_perf,
+                                unsigned long target_perf,
+                                unsigned long capacity)
+{
+       cpufreq_driver->adjust_perf(cpu, min_perf, target_perf, capacity);
+}
+
+/**
+ * cpufreq_driver_has_adjust_perf - Check "direct fast switch" callback.
+ *
+ * Return 'true' if the ->adjust_perf callback is present for the
+ * current driver or 'false' otherwise.
+ */
+bool cpufreq_driver_has_adjust_perf(void)
+{
+       return !!cpufreq_driver->adjust_perf;
+}
+
 /* Must set freqs->new to intermediate frequency */
 static int __target_intermediate(struct cpufreq_policy *policy,
                                 struct cpufreq_freqs *freqs, int index)
index 2a4db85..6e23376 100644 (file)
@@ -2207,9 +2207,9 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
                                            unsigned int policy_min,
                                            unsigned int policy_max)
 {
-       int max_freq = intel_pstate_get_max_freq(cpu);
        int32_t max_policy_perf, min_policy_perf;
        int max_state, turbo_max;
+       int max_freq;
 
        /*
         * HWP needs some special consideration, because on BDX the
@@ -2223,6 +2223,7 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
                        cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
                turbo_max = cpu->pstate.turbo_pstate;
        }
+       max_freq = max_state * cpu->pstate.scaling;
 
        max_policy_perf = max_state * policy_max / max_freq;
        if (policy_max == policy_min) {
@@ -2325,9 +2326,18 @@ static void intel_pstate_adjust_policy_max(struct cpudata *cpu,
 static void intel_pstate_verify_cpu_policy(struct cpudata *cpu,
                                           struct cpufreq_policy_data *policy)
 {
+       int max_freq;
+
        update_turbo_state();
-       cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-                                    intel_pstate_get_max_freq(cpu));
+       if (hwp_active) {
+               int max_state, turbo_max;
+
+               intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state);
+               max_freq = max_state * cpu->pstate.scaling;
+       } else {
+               max_freq = intel_pstate_get_max_freq(cpu);
+       }
+       cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, max_freq);
 
        intel_pstate_adjust_policy_max(cpu, policy);
 }
@@ -2526,20 +2536,19 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in
                fp_toint(cpu->iowait_boost * 100));
 }
 
-static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 target_pstate,
-                                    bool strict, bool fast_switch)
+static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min, u32 max,
+                                    u32 desired, bool fast_switch)
 {
        u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev;
 
        value &= ~HWP_MIN_PERF(~0L);
-       value |= HWP_MIN_PERF(target_pstate);
+       value |= HWP_MIN_PERF(min);
 
-       /*
-        * The entire MSR needs to be updated in order to update the HWP min
-        * field in it, so opportunistically update the max too if needed.
-        */
        value &= ~HWP_MAX_PERF(~0L);
-       value |= HWP_MAX_PERF(strict ? target_pstate : cpu->max_perf_ratio);
+       value |= HWP_MAX_PERF(max);
+
+       value &= ~HWP_DESIRED_PERF(~0L);
+       value |= HWP_DESIRED_PERF(desired);
 
        if (value == prev)
                return;
@@ -2569,11 +2578,15 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy,
        int old_pstate = cpu->pstate.current_pstate;
 
        target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
-       if (hwp_active)
-               intel_cpufreq_adjust_hwp(cpu, target_pstate,
-                                        policy->strict_target, fast_switch);
-       else if (target_pstate != old_pstate)
+       if (hwp_active) {
+               int max_pstate = policy->strict_target ?
+                                       target_pstate : cpu->max_perf_ratio;
+
+               intel_cpufreq_adjust_hwp(cpu, target_pstate, max_pstate, 0,
+                                        fast_switch);
+       } else if (target_pstate != old_pstate) {
                intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch);
+       }
 
        cpu->pstate.current_pstate = target_pstate;
 
@@ -2634,6 +2647,47 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
        return target_pstate * cpu->pstate.scaling;
 }
 
+static void intel_cpufreq_adjust_perf(unsigned int cpunum,
+                                     unsigned long min_perf,
+                                     unsigned long target_perf,
+                                     unsigned long capacity)
+{
+       struct cpudata *cpu = all_cpu_data[cpunum];
+       int old_pstate = cpu->pstate.current_pstate;
+       int cap_pstate, min_pstate, max_pstate, target_pstate;
+
+       update_turbo_state();
+       cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate :
+                                            cpu->pstate.turbo_pstate;
+
+       /* Optimization: Avoid unnecessary divisions. */
+
+       target_pstate = cap_pstate;
+       if (target_perf < capacity)
+               target_pstate = DIV_ROUND_UP(cap_pstate * target_perf, capacity);
+
+       min_pstate = cap_pstate;
+       if (min_perf < capacity)
+               min_pstate = DIV_ROUND_UP(cap_pstate * min_perf, capacity);
+
+       if (min_pstate < cpu->pstate.min_pstate)
+               min_pstate = cpu->pstate.min_pstate;
+
+       if (min_pstate < cpu->min_perf_ratio)
+               min_pstate = cpu->min_perf_ratio;
+
+       max_pstate = min(cap_pstate, cpu->max_perf_ratio);
+       if (max_pstate < min_pstate)
+               max_pstate = min_pstate;
+
+       target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate);
+
+       intel_cpufreq_adjust_hwp(cpu, min_pstate, max_pstate, target_pstate, true);
+
+       cpu->pstate.current_pstate = target_pstate;
+       intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
+}
+
 static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
        int max_state, turbo_max, min_freq, max_freq, ret;
@@ -3032,6 +3086,8 @@ static int __init intel_pstate_init(void)
                        intel_pstate.attr = hwp_cpufreq_attrs;
                        intel_cpufreq.attr = hwp_cpufreq_attrs;
                        intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS;
+                       intel_cpufreq.fast_switch = NULL;
+                       intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf;
                        if (!default_driver)
                                default_driver = &intel_pstate;
 
index 0eb80c1..e63684d 100644 (file)
@@ -1166,9 +1166,6 @@ EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
                 unsigned long pgoff)
 {
-       struct file *oldfile;
-       int ret;
-
        if (WARN_ON(!dmabuf || !vma))
                return -EINVAL;
 
@@ -1186,22 +1183,10 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
                return -EINVAL;
 
        /* readjust the vma */
-       get_file(dmabuf->file);
-       oldfile = vma->vm_file;
-       vma->vm_file = dmabuf->file;
+       vma_set_file(vma, dmabuf->file);
        vma->vm_pgoff = pgoff;
 
-       ret = dmabuf->ops->mmap(dmabuf, vma);
-       if (ret) {
-               /* restore old parameters on failure */
-               vma->vm_file = oldfile;
-               fput(dmabuf->file);
-       } else {
-               if (oldfile)
-                       fput(oldfile);
-       }
-       return ret;
-
+       return dmabuf->ops->mmap(dmabuf, vma);
 }
 EXPORT_SYMBOL_GPL(dma_buf_mmap);
 
index bb5a42b..6ddbeb5 100644 (file)
@@ -200,7 +200,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences)
                        max = max(old->shared_count + num_fences,
                                  old->shared_max * 2);
        } else {
-               max = 4;
+               max = max(4ul, roundup_pow_of_two(num_fences));
        }
 
        new = dma_resv_list_alloc(max);
index 6e54cde..9744677 100644 (file)
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y                                  += heap-helpers.o
 obj-$(CONFIG_DMABUF_HEAPS_SYSTEM)      += system_heap.o
 obj-$(CONFIG_DMABUF_HEAPS_CMA)         += cma_heap.o
index e55384d..5e7c343 100644 (file)
 /*
  * DMABUF CMA heap exporter
  *
- * Copyright (C) 2012, 2019 Linaro Ltd.
+ * Copyright (C) 2012, 2019, 2020 Linaro Ltd.
  * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
+ *
+ * Also utilizing parts of Andrew Davis' SRAM heap:
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/
+ *     Andrew F. Davis <afd@ti.com>
  */
-
 #include <linux/cma.h>
-#include <linux/device.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-heap.h>
 #include <linux/dma-map-ops.h>
 #include <linux/err.h>
-#include <linux/errno.h>
 #include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/scatterlist.h>
-#include <linux/sched/signal.h>
+#include <linux/slab.h>
 
-#include "heap-helpers.h"
 
 struct cma_heap {
        struct dma_heap *heap;
        struct cma *cma;
 };
 
-static void cma_heap_free(struct heap_helper_buffer *buffer)
+struct cma_heap_buffer {
+       struct cma_heap *heap;
+       struct list_head attachments;
+       struct mutex lock;
+       unsigned long len;
+       struct page *cma_pages;
+       struct page **pages;
+       pgoff_t pagecount;
+       int vmap_cnt;
+       void *vaddr;
+};
+
+struct dma_heap_attachment {
+       struct device *dev;
+       struct sg_table table;
+       struct list_head list;
+       bool mapped;
+};
+
+static int cma_heap_attach(struct dma_buf *dmabuf,
+                          struct dma_buf_attachment *attachment)
 {
-       struct cma_heap *cma_heap = dma_heap_get_drvdata(buffer->heap);
-       unsigned long nr_pages = buffer->pagecount;
-       struct page *cma_pages = buffer->priv_virt;
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a;
+       int ret;
 
-       /* free page list */
-       kfree(buffer->pages);
-       /* release memory */
-       cma_release(cma_heap->cma, cma_pages, nr_pages);
+       a = kzalloc(sizeof(*a), GFP_KERNEL);
+       if (!a)
+               return -ENOMEM;
+
+       ret = sg_alloc_table_from_pages(&a->table, buffer->pages,
+                                       buffer->pagecount, 0,
+                                       buffer->pagecount << PAGE_SHIFT,
+                                       GFP_KERNEL);
+       if (ret) {
+               kfree(a);
+               return ret;
+       }
+
+       a->dev = attachment->dev;
+       INIT_LIST_HEAD(&a->list);
+       a->mapped = false;
+
+       attachment->priv = a;
+
+       mutex_lock(&buffer->lock);
+       list_add(&a->list, &buffer->attachments);
+       mutex_unlock(&buffer->lock);
+
+       return 0;
+}
+
+static void cma_heap_detach(struct dma_buf *dmabuf,
+                           struct dma_buf_attachment *attachment)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a = attachment->priv;
+
+       mutex_lock(&buffer->lock);
+       list_del(&a->list);
+       mutex_unlock(&buffer->lock);
+
+       sg_free_table(&a->table);
+       kfree(a);
+}
+
+static struct sg_table *cma_heap_map_dma_buf(struct dma_buf_attachment *attachment,
+                                            enum dma_data_direction direction)
+{
+       struct dma_heap_attachment *a = attachment->priv;
+       struct sg_table *table = &a->table;
+       int ret;
+
+       ret = dma_map_sgtable(attachment->dev, table, direction, 0);
+       if (ret)
+               return ERR_PTR(-ENOMEM);
+       a->mapped = true;
+       return table;
+}
+
+static void cma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment,
+                                  struct sg_table *table,
+                                  enum dma_data_direction direction)
+{
+       struct dma_heap_attachment *a = attachment->priv;
+
+       a->mapped = false;
+       dma_unmap_sgtable(attachment->dev, table, direction, 0);
+}
+
+static int cma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+                                            enum dma_data_direction direction)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a;
+
+       if (buffer->vmap_cnt)
+               invalidate_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+       mutex_lock(&buffer->lock);
+       list_for_each_entry(a, &buffer->attachments, list) {
+               if (!a->mapped)
+                       continue;
+               dma_sync_sgtable_for_cpu(a->dev, &a->table, direction);
+       }
+       mutex_unlock(&buffer->lock);
+
+       return 0;
+}
+
+static int cma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+                                          enum dma_data_direction direction)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a;
+
+       if (buffer->vmap_cnt)
+               flush_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+       mutex_lock(&buffer->lock);
+       list_for_each_entry(a, &buffer->attachments, list) {
+               if (!a->mapped)
+                       continue;
+               dma_sync_sgtable_for_device(a->dev, &a->table, direction);
+       }
+       mutex_unlock(&buffer->lock);
+
+       return 0;
+}
+
+static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       struct cma_heap_buffer *buffer = vma->vm_private_data;
+
+       if (vmf->pgoff > buffer->pagecount)
+               return VM_FAULT_SIGBUS;
+
+       vmf->page = buffer->pages[vmf->pgoff];
+       get_page(vmf->page);
+
+       return 0;
+}
+
+static const struct vm_operations_struct dma_heap_vm_ops = {
+       .fault = cma_heap_vm_fault,
+};
+
+static int cma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+
+       if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
+               return -EINVAL;
+
+       vma->vm_ops = &dma_heap_vm_ops;
+       vma->vm_private_data = buffer;
+
+       return 0;
+}
+
+static void *cma_heap_do_vmap(struct cma_heap_buffer *buffer)
+{
+       void *vaddr;
+
+       vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, PAGE_KERNEL);
+       if (!vaddr)
+               return ERR_PTR(-ENOMEM);
+
+       return vaddr;
+}
+
+static int cma_heap_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+       void *vaddr;
+       int ret = 0;
+
+       mutex_lock(&buffer->lock);
+       if (buffer->vmap_cnt) {
+               buffer->vmap_cnt++;
+               dma_buf_map_set_vaddr(map, buffer->vaddr);
+               goto out;
+       }
+
+       vaddr = cma_heap_do_vmap(buffer);
+       if (IS_ERR(vaddr)) {
+               ret = PTR_ERR(vaddr);
+               goto out;
+       }
+       buffer->vaddr = vaddr;
+       buffer->vmap_cnt++;
+       dma_buf_map_set_vaddr(map, buffer->vaddr);
+out:
+       mutex_unlock(&buffer->lock);
+
+       return ret;
+}
+
+static void cma_heap_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+
+       mutex_lock(&buffer->lock);
+       if (!--buffer->vmap_cnt) {
+               vunmap(buffer->vaddr);
+               buffer->vaddr = NULL;
+       }
+       mutex_unlock(&buffer->lock);
+       dma_buf_map_clear(map);
+}
+
+static void cma_heap_dma_buf_release(struct dma_buf *dmabuf)
+{
+       struct cma_heap_buffer *buffer = dmabuf->priv;
+       struct cma_heap *cma_heap = buffer->heap;
+
+       if (buffer->vmap_cnt > 0) {
+               WARN(1, "%s: buffer still mapped in the kernel\n", __func__);
+               vunmap(buffer->vaddr);
+               buffer->vaddr = NULL;
+       }
+
+       cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount);
        kfree(buffer);
 }
 
-/* dmabuf heap CMA operations functions */
+static const struct dma_buf_ops cma_heap_buf_ops = {
+       .attach = cma_heap_attach,
+       .detach = cma_heap_detach,
+       .map_dma_buf = cma_heap_map_dma_buf,
+       .unmap_dma_buf = cma_heap_unmap_dma_buf,
+       .begin_cpu_access = cma_heap_dma_buf_begin_cpu_access,
+       .end_cpu_access = cma_heap_dma_buf_end_cpu_access,
+       .mmap = cma_heap_mmap,
+       .vmap = cma_heap_vmap,
+       .vunmap = cma_heap_vunmap,
+       .release = cma_heap_dma_buf_release,
+};
+
 static int cma_heap_allocate(struct dma_heap *heap,
-                            unsigned long len,
-                            unsigned long fd_flags,
-                            unsigned long heap_flags)
+                                 unsigned long len,
+                                 unsigned long fd_flags,
+                                 unsigned long heap_flags)
 {
        struct cma_heap *cma_heap = dma_heap_get_drvdata(heap);
-       struct heap_helper_buffer *helper_buffer;
-       struct page *cma_pages;
+       struct cma_heap_buffer *buffer;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
        size_t size = PAGE_ALIGN(len);
-       unsigned long nr_pages = size >> PAGE_SHIFT;
+       pgoff_t pagecount = size >> PAGE_SHIFT;
        unsigned long align = get_order(size);
+       struct page *cma_pages;
        struct dma_buf *dmabuf;
        int ret = -ENOMEM;
        pgoff_t pg;
 
-       if (align > CONFIG_CMA_ALIGNMENT)
-               align = CONFIG_CMA_ALIGNMENT;
-
-       helper_buffer = kzalloc(sizeof(*helper_buffer), GFP_KERNEL);
-       if (!helper_buffer)
+       buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+       if (!buffer)
                return -ENOMEM;
 
-       init_heap_helper_buffer(helper_buffer, cma_heap_free);
-       helper_buffer->heap = heap;
-       helper_buffer->size = len;
+       INIT_LIST_HEAD(&buffer->attachments);
+       mutex_init(&buffer->lock);
+       buffer->len = size;
+
+       if (align > CONFIG_CMA_ALIGNMENT)
+               align = CONFIG_CMA_ALIGNMENT;
 
-       cma_pages = cma_alloc(cma_heap->cma, nr_pages, align, false);
+       cma_pages = cma_alloc(cma_heap->cma, pagecount, align, false);
        if (!cma_pages)
-               goto free_buf;
+               goto free_buffer;
 
+       /* Clear the cma pages */
        if (PageHighMem(cma_pages)) {
-               unsigned long nr_clear_pages = nr_pages;
+               unsigned long nr_clear_pages = pagecount;
                struct page *page = cma_pages;
 
                while (nr_clear_pages > 0) {
@@ -85,7 +314,6 @@ static int cma_heap_allocate(struct dma_heap *heap,
                         */
                        if (fatal_signal_pending(current))
                                goto free_cma;
-
                        page++;
                        nr_clear_pages--;
                }
@@ -93,28 +321,30 @@ static int cma_heap_allocate(struct dma_heap *heap,
                memset(page_address(cma_pages), 0, size);
        }
 
-       helper_buffer->pagecount = nr_pages;
-       helper_buffer->pages = kmalloc_array(helper_buffer->pagecount,
-                                            sizeof(*helper_buffer->pages),
-                                            GFP_KERNEL);
-       if (!helper_buffer->pages) {
+       buffer->pages = kmalloc_array(pagecount, sizeof(*buffer->pages), GFP_KERNEL);
+       if (!buffer->pages) {
                ret = -ENOMEM;
                goto free_cma;
        }
 
-       for (pg = 0; pg < helper_buffer->pagecount; pg++)
-               helper_buffer->pages[pg] = &cma_pages[pg];
+       for (pg = 0; pg < pagecount; pg++)
+               buffer->pages[pg] = &cma_pages[pg];
+
+       buffer->cma_pages = cma_pages;
+       buffer->heap = cma_heap;
+       buffer->pagecount = pagecount;
 
        /* create the dmabuf */
-       dmabuf = heap_helper_export_dmabuf(helper_buffer, fd_flags);
+       exp_info.ops = &cma_heap_buf_ops;
+       exp_info.size = buffer->len;
+       exp_info.flags = fd_flags;
+       exp_info.priv = buffer;
+       dmabuf = dma_buf_export(&exp_info);
        if (IS_ERR(dmabuf)) {
                ret = PTR_ERR(dmabuf);
                goto free_pages;
        }
 
-       helper_buffer->dmabuf = dmabuf;
-       helper_buffer->priv_virt = cma_pages;
-
        ret = dma_buf_fd(dmabuf, fd_flags);
        if (ret < 0) {
                dma_buf_put(dmabuf);
@@ -125,11 +355,12 @@ static int cma_heap_allocate(struct dma_heap *heap,
        return ret;
 
 free_pages:
-       kfree(helper_buffer->pages);
+       kfree(buffer->pages);
 free_cma:
-       cma_release(cma_heap->cma, cma_pages, nr_pages);
-free_buf:
-       kfree(helper_buffer);
+       cma_release(cma_heap->cma, cma_pages, pagecount);
+free_buffer:
+       kfree(buffer);
+
        return ret;
 }
 
diff --git a/drivers/dma-buf/heaps/heap-helpers.c b/drivers/dma-buf/heaps/heap-helpers.c
deleted file mode 100644 (file)
index fcf4ce3..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/device.h>
-#include <linux/dma-buf.h>
-#include <linux/err.h>
-#include <linux/highmem.h>
-#include <linux/idr.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <uapi/linux/dma-heap.h>
-
-#include "heap-helpers.h"
-
-void init_heap_helper_buffer(struct heap_helper_buffer *buffer,
-                            void (*free)(struct heap_helper_buffer *))
-{
-       buffer->priv_virt = NULL;
-       mutex_init(&buffer->lock);
-       buffer->vmap_cnt = 0;
-       buffer->vaddr = NULL;
-       buffer->pagecount = 0;
-       buffer->pages = NULL;
-       INIT_LIST_HEAD(&buffer->attachments);
-       buffer->free = free;
-}
-
-struct dma_buf *heap_helper_export_dmabuf(struct heap_helper_buffer *buffer,
-                                         int fd_flags)
-{
-       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-
-       exp_info.ops = &heap_helper_ops;
-       exp_info.size = buffer->size;
-       exp_info.flags = fd_flags;
-       exp_info.priv = buffer;
-
-       return dma_buf_export(&exp_info);
-}
-
-static void *dma_heap_map_kernel(struct heap_helper_buffer *buffer)
-{
-       void *vaddr;
-
-       vaddr = vmap(buffer->pages, buffer->pagecount, VM_MAP, PAGE_KERNEL);
-       if (!vaddr)
-               return ERR_PTR(-ENOMEM);
-
-       return vaddr;
-}
-
-static void dma_heap_buffer_destroy(struct heap_helper_buffer *buffer)
-{
-       if (buffer->vmap_cnt > 0) {
-               WARN(1, "%s: buffer still mapped in the kernel\n", __func__);
-               vunmap(buffer->vaddr);
-       }
-
-       buffer->free(buffer);
-}
-
-static void *dma_heap_buffer_vmap_get(struct heap_helper_buffer *buffer)
-{
-       void *vaddr;
-
-       if (buffer->vmap_cnt) {
-               buffer->vmap_cnt++;
-               return buffer->vaddr;
-       }
-       vaddr = dma_heap_map_kernel(buffer);
-       if (IS_ERR(vaddr))
-               return vaddr;
-       buffer->vaddr = vaddr;
-       buffer->vmap_cnt++;
-       return vaddr;
-}
-
-static void dma_heap_buffer_vmap_put(struct heap_helper_buffer *buffer)
-{
-       if (!--buffer->vmap_cnt) {
-               vunmap(buffer->vaddr);
-               buffer->vaddr = NULL;
-       }
-}
-
-struct dma_heaps_attachment {
-       struct device *dev;
-       struct sg_table table;
-       struct list_head list;
-};
-
-static int dma_heap_attach(struct dma_buf *dmabuf,
-                          struct dma_buf_attachment *attachment)
-{
-       struct dma_heaps_attachment *a;
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-       int ret;
-
-       a = kzalloc(sizeof(*a), GFP_KERNEL);
-       if (!a)
-               return -ENOMEM;
-
-       ret = sg_alloc_table_from_pages(&a->table, buffer->pages,
-                                       buffer->pagecount, 0,
-                                       buffer->pagecount << PAGE_SHIFT,
-                                       GFP_KERNEL);
-       if (ret) {
-               kfree(a);
-               return ret;
-       }
-
-       a->dev = attachment->dev;
-       INIT_LIST_HEAD(&a->list);
-
-       attachment->priv = a;
-
-       mutex_lock(&buffer->lock);
-       list_add(&a->list, &buffer->attachments);
-       mutex_unlock(&buffer->lock);
-
-       return 0;
-}
-
-static void dma_heap_detach(struct dma_buf *dmabuf,
-                           struct dma_buf_attachment *attachment)
-{
-       struct dma_heaps_attachment *a = attachment->priv;
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-
-       mutex_lock(&buffer->lock);
-       list_del(&a->list);
-       mutex_unlock(&buffer->lock);
-
-       sg_free_table(&a->table);
-       kfree(a);
-}
-
-static
-struct sg_table *dma_heap_map_dma_buf(struct dma_buf_attachment *attachment,
-                                     enum dma_data_direction direction)
-{
-       struct dma_heaps_attachment *a = attachment->priv;
-       struct sg_table *table = &a->table;
-       int ret;
-
-       ret = dma_map_sgtable(attachment->dev, table, direction, 0);
-       if (ret)
-               table = ERR_PTR(ret);
-       return table;
-}
-
-static void dma_heap_unmap_dma_buf(struct dma_buf_attachment *attachment,
-                                  struct sg_table *table,
-                                  enum dma_data_direction direction)
-{
-       dma_unmap_sgtable(attachment->dev, table, direction, 0);
-}
-
-static vm_fault_t dma_heap_vm_fault(struct vm_fault *vmf)
-{
-       struct vm_area_struct *vma = vmf->vma;
-       struct heap_helper_buffer *buffer = vma->vm_private_data;
-
-       if (vmf->pgoff > buffer->pagecount)
-               return VM_FAULT_SIGBUS;
-
-       vmf->page = buffer->pages[vmf->pgoff];
-       get_page(vmf->page);
-
-       return 0;
-}
-
-static const struct vm_operations_struct dma_heap_vm_ops = {
-       .fault = dma_heap_vm_fault,
-};
-
-static int dma_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
-{
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-
-       if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
-               return -EINVAL;
-
-       vma->vm_ops = &dma_heap_vm_ops;
-       vma->vm_private_data = buffer;
-
-       return 0;
-}
-
-static void dma_heap_dma_buf_release(struct dma_buf *dmabuf)
-{
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-
-       dma_heap_buffer_destroy(buffer);
-}
-
-static int dma_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-                                            enum dma_data_direction direction)
-{
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-       struct dma_heaps_attachment *a;
-       int ret = 0;
-
-       mutex_lock(&buffer->lock);
-
-       if (buffer->vmap_cnt)
-               invalidate_kernel_vmap_range(buffer->vaddr, buffer->size);
-
-       list_for_each_entry(a, &buffer->attachments, list) {
-               dma_sync_sg_for_cpu(a->dev, a->table.sgl, a->table.nents,
-                                   direction);
-       }
-       mutex_unlock(&buffer->lock);
-
-       return ret;
-}
-
-static int dma_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-                                          enum dma_data_direction direction)
-{
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-       struct dma_heaps_attachment *a;
-
-       mutex_lock(&buffer->lock);
-
-       if (buffer->vmap_cnt)
-               flush_kernel_vmap_range(buffer->vaddr, buffer->size);
-
-       list_for_each_entry(a, &buffer->attachments, list) {
-               dma_sync_sg_for_device(a->dev, a->table.sgl, a->table.nents,
-                                      direction);
-       }
-       mutex_unlock(&buffer->lock);
-
-       return 0;
-}
-
-static int dma_heap_dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
-{
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-       void *vaddr;
-
-       mutex_lock(&buffer->lock);
-       vaddr = dma_heap_buffer_vmap_get(buffer);
-       mutex_unlock(&buffer->lock);
-
-       if (!vaddr)
-               return -ENOMEM;
-       dma_buf_map_set_vaddr(map, vaddr);
-
-       return 0;
-}
-
-static void dma_heap_dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
-{
-       struct heap_helper_buffer *buffer = dmabuf->priv;
-
-       mutex_lock(&buffer->lock);
-       dma_heap_buffer_vmap_put(buffer);
-       mutex_unlock(&buffer->lock);
-}
-
-const struct dma_buf_ops heap_helper_ops = {
-       .map_dma_buf = dma_heap_map_dma_buf,
-       .unmap_dma_buf = dma_heap_unmap_dma_buf,
-       .mmap = dma_heap_mmap,
-       .release = dma_heap_dma_buf_release,
-       .attach = dma_heap_attach,
-       .detach = dma_heap_detach,
-       .begin_cpu_access = dma_heap_dma_buf_begin_cpu_access,
-       .end_cpu_access = dma_heap_dma_buf_end_cpu_access,
-       .vmap = dma_heap_dma_buf_vmap,
-       .vunmap = dma_heap_dma_buf_vunmap,
-};
diff --git a/drivers/dma-buf/heaps/heap-helpers.h b/drivers/dma-buf/heaps/heap-helpers.h
deleted file mode 100644 (file)
index 805d2df..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * DMABUF Heaps helper code
- *
- * Copyright (C) 2011 Google, Inc.
- * Copyright (C) 2019 Linaro Ltd.
- */
-
-#ifndef _HEAP_HELPERS_H
-#define _HEAP_HELPERS_H
-
-#include <linux/dma-heap.h>
-#include <linux/list.h>
-
-/**
- * struct heap_helper_buffer - helper buffer metadata
- * @heap:              back pointer to the heap the buffer came from
- * @dmabuf:            backing dma-buf for this buffer
- * @size:              size of the buffer
- * @priv_virt          pointer to heap specific private value
- * @lock               mutext to protect the data in this structure
- * @vmap_cnt           count of vmap references on the buffer
- * @vaddr              vmap'ed virtual address
- * @pagecount          number of pages in the buffer
- * @pages              list of page pointers
- * @attachments                list of device attachments
- *
- * @free               heap callback to free the buffer
- */
-struct heap_helper_buffer {
-       struct dma_heap *heap;
-       struct dma_buf *dmabuf;
-       size_t size;
-
-       void *priv_virt;
-       struct mutex lock;
-       int vmap_cnt;
-       void *vaddr;
-       pgoff_t pagecount;
-       struct page **pages;
-       struct list_head attachments;
-
-       void (*free)(struct heap_helper_buffer *buffer);
-};
-
-void init_heap_helper_buffer(struct heap_helper_buffer *buffer,
-                            void (*free)(struct heap_helper_buffer *));
-
-struct dma_buf *heap_helper_export_dmabuf(struct heap_helper_buffer *buffer,
-                                         int fd_flags);
-
-extern const struct dma_buf_ops heap_helper_ops;
-#endif /* _HEAP_HELPERS_H */
index 0bf688e..17e0e9a 100644 (file)
@@ -3,7 +3,11 @@
  * DMABUF System heap exporter
  *
  * Copyright (C) 2011 Google, Inc.
- * Copyright (C) 2019 Linaro Ltd.
+ * Copyright (C) 2019, 2020 Linaro Ltd.
+ *
+ * Portions based off of Andrew Davis' SRAM heap:
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/
+ *     Andrew F. Davis <afd@ti.com>
  */
 
 #include <linux/dma-buf.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
-#include <linux/sched/signal.h>
-#include <asm/page.h>
+#include <linux/vmalloc.h>
+
+static struct dma_heap *sys_heap;
+
+struct system_heap_buffer {
+       struct dma_heap *heap;
+       struct list_head attachments;
+       struct mutex lock;
+       unsigned long len;
+       struct sg_table sg_table;
+       int vmap_cnt;
+       void *vaddr;
+};
+
+struct dma_heap_attachment {
+       struct device *dev;
+       struct sg_table *table;
+       struct list_head list;
+       bool mapped;
+};
+
+#define HIGH_ORDER_GFP  (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \
+                               | __GFP_NORETRY) & ~__GFP_RECLAIM) \
+                               | __GFP_COMP)
+#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_COMP)
+static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP};
+/*
+ * The selection of the orders used for allocation (1MB, 64K, 4K) is designed
+ * to match with the sizes often found in IOMMUs. Using order 4 pages instead
+ * of order 0 pages can significantly improve the performance of many IOMMUs
+ * by reducing TLB pressure and time spent updating page tables.
+ */
+static const unsigned int orders[] = {8, 4, 0};
+#define NUM_ORDERS ARRAY_SIZE(orders)
+
+static struct sg_table *dup_sg_table(struct sg_table *table)
+{
+       struct sg_table *new_table;
+       int ret, i;
+       struct scatterlist *sg, *new_sg;
+
+       new_table = kzalloc(sizeof(*new_table), GFP_KERNEL);
+       if (!new_table)
+               return ERR_PTR(-ENOMEM);
+
+       ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL);
+       if (ret) {
+               kfree(new_table);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       new_sg = new_table->sgl;
+       for_each_sgtable_sg(table, sg, i) {
+               sg_set_page(new_sg, sg_page(sg), sg->length, sg->offset);
+               new_sg = sg_next(new_sg);
+       }
+
+       return new_table;
+}
+
+static int system_heap_attach(struct dma_buf *dmabuf,
+                             struct dma_buf_attachment *attachment)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a;
+       struct sg_table *table;
+
+       a = kzalloc(sizeof(*a), GFP_KERNEL);
+       if (!a)
+               return -ENOMEM;
+
+       table = dup_sg_table(&buffer->sg_table);
+       if (IS_ERR(table)) {
+               kfree(a);
+               return -ENOMEM;
+       }
+
+       a->table = table;
+       a->dev = attachment->dev;
+       INIT_LIST_HEAD(&a->list);
+       a->mapped = false;
+
+       attachment->priv = a;
+
+       mutex_lock(&buffer->lock);
+       list_add(&a->list, &buffer->attachments);
+       mutex_unlock(&buffer->lock);
+
+       return 0;
+}
+
+static void system_heap_detach(struct dma_buf *dmabuf,
+                              struct dma_buf_attachment *attachment)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a = attachment->priv;
+
+       mutex_lock(&buffer->lock);
+       list_del(&a->list);
+       mutex_unlock(&buffer->lock);
+
+       sg_free_table(a->table);
+       kfree(a->table);
+       kfree(a);
+}
+
+static struct sg_table *system_heap_map_dma_buf(struct dma_buf_attachment *attachment,
+                                               enum dma_data_direction direction)
+{
+       struct dma_heap_attachment *a = attachment->priv;
+       struct sg_table *table = a->table;
+       int ret;
+
+       ret = dma_map_sgtable(attachment->dev, table, direction, 0);
+       if (ret)
+               return ERR_PTR(ret);
+
+       a->mapped = true;
+       return table;
+}
+
+static void system_heap_unmap_dma_buf(struct dma_buf_attachment *attachment,
+                                     struct sg_table *table,
+                                     enum dma_data_direction direction)
+{
+       struct dma_heap_attachment *a = attachment->priv;
+
+       a->mapped = false;
+       dma_unmap_sgtable(attachment->dev, table, direction, 0);
+}
+
+static int system_heap_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+                                               enum dma_data_direction direction)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a;
+
+       mutex_lock(&buffer->lock);
+
+       if (buffer->vmap_cnt)
+               invalidate_kernel_vmap_range(buffer->vaddr, buffer->len);
+
+       list_for_each_entry(a, &buffer->attachments, list) {
+               if (!a->mapped)
+                       continue;
+               dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
+       }
+       mutex_unlock(&buffer->lock);
+
+       return 0;
+}
+
+static int system_heap_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+                                             enum dma_data_direction direction)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       struct dma_heap_attachment *a;
+
+       mutex_lock(&buffer->lock);
+
+       if (buffer->vmap_cnt)
+               flush_kernel_vmap_range(buffer->vaddr, buffer->len);
 
-#include "heap-helpers.h"
+       list_for_each_entry(a, &buffer->attachments, list) {
+               if (!a->mapped)
+                       continue;
+               dma_sync_sgtable_for_device(a->dev, a->table, direction);
+       }
+       mutex_unlock(&buffer->lock);
+
+       return 0;
+}
+
+static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       struct sg_table *table = &buffer->sg_table;
+       unsigned long addr = vma->vm_start;
+       struct sg_page_iter piter;
+       int ret;
+
+       for_each_sgtable_page(table, &piter, vma->vm_pgoff) {
+               struct page *page = sg_page_iter_page(&piter);
+
+               ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE,
+                                     vma->vm_page_prot);
+               if (ret)
+                       return ret;
+               addr += PAGE_SIZE;
+               if (addr >= vma->vm_end)
+                       return 0;
+       }
+       return 0;
+}
+
+static void *system_heap_do_vmap(struct system_heap_buffer *buffer)
+{
+       struct sg_table *table = &buffer->sg_table;
+       int npages = PAGE_ALIGN(buffer->len) / PAGE_SIZE;
+       struct page **pages = vmalloc(sizeof(struct page *) * npages);
+       struct page **tmp = pages;
+       struct sg_page_iter piter;
+       void *vaddr;
+
+       if (!pages)
+               return ERR_PTR(-ENOMEM);
+
+       for_each_sgtable_page(table, &piter, 0) {
+               WARN_ON(tmp - pages >= npages);
+               *tmp++ = sg_page_iter_page(&piter);
+       }
+
+       vaddr = vmap(pages, npages, VM_MAP, PAGE_KERNEL);
+       vfree(pages);
+
+       if (!vaddr)
+               return ERR_PTR(-ENOMEM);
+
+       return vaddr;
+}
+
+static int system_heap_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       void *vaddr;
+       int ret = 0;
+
+       mutex_lock(&buffer->lock);
+       if (buffer->vmap_cnt) {
+               buffer->vmap_cnt++;
+               dma_buf_map_set_vaddr(map, buffer->vaddr);
+               goto out;
+       }
+
+       vaddr = system_heap_do_vmap(buffer);
+       if (IS_ERR(vaddr)) {
+               ret = PTR_ERR(vaddr);
+               goto out;
+       }
+
+       buffer->vaddr = vaddr;
+       buffer->vmap_cnt++;
+       dma_buf_map_set_vaddr(map, buffer->vaddr);
+out:
+       mutex_unlock(&buffer->lock);
+
+       return ret;
+}
 
-struct dma_heap *sys_heap;
+static void system_heap_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map)
+{
+       struct system_heap_buffer *buffer = dmabuf->priv;
 
-static void system_heap_free(struct heap_helper_buffer *buffer)
+       mutex_lock(&buffer->lock);
+       if (!--buffer->vmap_cnt) {
+               vunmap(buffer->vaddr);
+               buffer->vaddr = NULL;
+       }
+       mutex_unlock(&buffer->lock);
+       dma_buf_map_clear(map);
+}
+
+static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
 {
-       pgoff_t pg;
+       struct system_heap_buffer *buffer = dmabuf->priv;
+       struct sg_table *table;
+       struct scatterlist *sg;
+       int i;
+
+       table = &buffer->sg_table;
+       for_each_sg(table->sgl, sg, table->nents, i) {
+               struct page *page = sg_page(sg);
 
-       for (pg = 0; pg < buffer->pagecount; pg++)
-               __free_page(buffer->pages[pg]);
-       kfree(buffer->pages);
+               __free_pages(page, compound_order(page));
+       }
+       sg_free_table(table);
        kfree(buffer);
 }
 
+static const struct dma_buf_ops system_heap_buf_ops = {
+       .attach = system_heap_attach,
+       .detach = system_heap_detach,
+       .map_dma_buf = system_heap_map_dma_buf,
+       .unmap_dma_buf = system_heap_unmap_dma_buf,
+       .begin_cpu_access = system_heap_dma_buf_begin_cpu_access,
+       .end_cpu_access = system_heap_dma_buf_end_cpu_access,
+       .mmap = system_heap_mmap,
+       .vmap = system_heap_vmap,
+       .vunmap = system_heap_vunmap,
+       .release = system_heap_dma_buf_release,
+};
+
+static struct page *alloc_largest_available(unsigned long size,
+                                           unsigned int max_order)
+{
+       struct page *page;
+       int i;
+
+       for (i = 0; i < NUM_ORDERS; i++) {
+               if (size <  (PAGE_SIZE << orders[i]))
+                       continue;
+               if (max_order < orders[i])
+                       continue;
+
+               page = alloc_pages(order_flags[i], orders[i]);
+               if (!page)
+                       continue;
+               return page;
+       }
+       return NULL;
+}
+
 static int system_heap_allocate(struct dma_heap *heap,
                                unsigned long len,
                                unsigned long fd_flags,
                                unsigned long heap_flags)
 {
-       struct heap_helper_buffer *helper_buffer;
+       struct system_heap_buffer *buffer;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+       unsigned long size_remaining = len;
+       unsigned int max_order = orders[0];
        struct dma_buf *dmabuf;
-       int ret = -ENOMEM;
-       pgoff_t pg;
+       struct sg_table *table;
+       struct scatterlist *sg;
+       struct list_head pages;
+       struct page *page, *tmp_page;
+       int i, ret = -ENOMEM;
 
-       helper_buffer = kzalloc(sizeof(*helper_buffer), GFP_KERNEL);
-       if (!helper_buffer)
+       buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+       if (!buffer)
                return -ENOMEM;
 
-       init_heap_helper_buffer(helper_buffer, system_heap_free);
-       helper_buffer->heap = heap;
-       helper_buffer->size = len;
-
-       helper_buffer->pagecount = len / PAGE_SIZE;
-       helper_buffer->pages = kmalloc_array(helper_buffer->pagecount,
-                                            sizeof(*helper_buffer->pages),
-                                            GFP_KERNEL);
-       if (!helper_buffer->pages) {
-               ret = -ENOMEM;
-               goto err0;
-       }
+       INIT_LIST_HEAD(&buffer->attachments);
+       mutex_init(&buffer->lock);
+       buffer->heap = heap;
+       buffer->len = len;
 
-       for (pg = 0; pg < helper_buffer->pagecount; pg++) {
+       INIT_LIST_HEAD(&pages);
+       i = 0;
+       while (size_remaining > 0) {
                /*
                 * Avoid trying to allocate memory if the process
-                * has been killed by by SIGKILL
+                * has been killed by SIGKILL
                 */
                if (fatal_signal_pending(current))
-                       goto err1;
+                       goto free_buffer;
+
+               page = alloc_largest_available(size_remaining, max_order);
+               if (!page)
+                       goto free_buffer;
+
+               list_add_tail(&page->lru, &pages);
+               size_remaining -= page_size(page);
+               max_order = compound_order(page);
+               i++;
+       }
+
+       table = &buffer->sg_table;
+       if (sg_alloc_table(table, i, GFP_KERNEL))
+               goto free_buffer;
 
-               helper_buffer->pages[pg] = alloc_page(GFP_KERNEL | __GFP_ZERO);
-               if (!helper_buffer->pages[pg])
-                       goto err1;
+       sg = table->sgl;
+       list_for_each_entry_safe(page, tmp_page, &pages, lru) {
+               sg_set_page(sg, page, page_size(page), 0);
+               sg = sg_next(sg);
+               list_del(&page->lru);
        }
 
        /* create the dmabuf */
-       dmabuf = heap_helper_export_dmabuf(helper_buffer, fd_flags);
+       exp_info.ops = &system_heap_buf_ops;
+       exp_info.size = buffer->len;
+       exp_info.flags = fd_flags;
+       exp_info.priv = buffer;
+       dmabuf = dma_buf_export(&exp_info);
        if (IS_ERR(dmabuf)) {
                ret = PTR_ERR(dmabuf);
-               goto err1;
+               goto free_pages;
        }
 
-       helper_buffer->dmabuf = dmabuf;
-
        ret = dma_buf_fd(dmabuf, fd_flags);
        if (ret < 0) {
                dma_buf_put(dmabuf);
                /* just return, as put will call release and that will free */
                return ret;
        }
-
        return ret;
 
-err1:
-       while (pg > 0)
-               __free_page(helper_buffer->pages[--pg]);
-       kfree(helper_buffer->pages);
-err0:
-       kfree(helper_buffer);
+free_pages:
+       for_each_sgtable_sg(table, sg, i) {
+               struct page *p = sg_page(sg);
+
+               __free_pages(p, compound_order(p));
+       }
+       sg_free_table(table);
+free_buffer:
+       list_for_each_entry_safe(page, tmp_page, &pages, lru)
+               __free_pages(page, compound_order(page));
+       kfree(buffer);
 
        return ret;
 }
@@ -107,7 +428,6 @@ static const struct dma_heap_ops system_heap_ops = {
 static int system_heap_create(void)
 {
        struct dma_heap_export_info exp_info;
-       int ret = 0;
 
        exp_info.name = "system";
        exp_info.ops = &system_heap_ops;
@@ -115,9 +435,9 @@ static int system_heap_create(void)
 
        sys_heap = dma_heap_add(&exp_info);
        if (IS_ERR(sys_heap))
-               ret = PTR_ERR(sys_heap);
+               return PTR_ERR(sys_heap);
 
-       return ret;
+       return 0;
 }
 module_init(system_heap_create);
 MODULE_LICENSE("GPL v2");
index 00af99b..f5fc429 100644 (file)
@@ -58,15 +58,12 @@ typedef unsigned long (psci_fn)(unsigned long, unsigned long,
                                unsigned long, unsigned long);
 static psci_fn *invoke_psci_fn;
 
-enum psci_function {
-       PSCI_FN_CPU_SUSPEND,
-       PSCI_FN_CPU_ON,
-       PSCI_FN_CPU_OFF,
-       PSCI_FN_MIGRATE,
-       PSCI_FN_MAX,
-};
+static struct psci_0_1_function_ids psci_0_1_function_ids;
 
-static u32 psci_function_id[PSCI_FN_MAX];
+struct psci_0_1_function_ids get_psci_0_1_function_ids(void)
+{
+       return psci_0_1_function_ids;
+}
 
 #define PSCI_0_2_POWER_STATE_MASK              \
                                (PSCI_0_2_POWER_STATE_ID_MASK | \
@@ -146,7 +143,12 @@ static int psci_to_linux_errno(int errno)
        return -EINVAL;
 }
 
-static u32 psci_get_version(void)
+static u32 psci_0_1_get_version(void)
+{
+       return PSCI_VERSION(0, 1);
+}
+
+static u32 psci_0_2_get_version(void)
 {
        return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
 }
@@ -163,46 +165,80 @@ int psci_set_osi_mode(bool enable)
        return psci_to_linux_errno(err);
 }
 
-static int psci_cpu_suspend(u32 state, unsigned long entry_point)
+static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point)
 {
        int err;
-       u32 fn;
 
-       fn = psci_function_id[PSCI_FN_CPU_SUSPEND];
        err = invoke_psci_fn(fn, state, entry_point, 0);
        return psci_to_linux_errno(err);
 }
 
-static int psci_cpu_off(u32 state)
+static int psci_0_1_cpu_suspend(u32 state, unsigned long entry_point)
+{
+       return __psci_cpu_suspend(psci_0_1_function_ids.cpu_suspend,
+                                 state, entry_point);
+}
+
+static int psci_0_2_cpu_suspend(u32 state, unsigned long entry_point)
+{
+       return __psci_cpu_suspend(PSCI_FN_NATIVE(0_2, CPU_SUSPEND),
+                                 state, entry_point);
+}
+
+static int __psci_cpu_off(u32 fn, u32 state)
 {
        int err;
-       u32 fn;
 
-       fn = psci_function_id[PSCI_FN_CPU_OFF];
        err = invoke_psci_fn(fn, state, 0, 0);
        return psci_to_linux_errno(err);
 }
 
-static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
+static int psci_0_1_cpu_off(u32 state)
+{
+       return __psci_cpu_off(psci_0_1_function_ids.cpu_off, state);
+}
+
+static int psci_0_2_cpu_off(u32 state)
+{
+       return __psci_cpu_off(PSCI_0_2_FN_CPU_OFF, state);
+}
+
+static int __psci_cpu_on(u32 fn, unsigned long cpuid, unsigned long entry_point)
 {
        int err;
-       u32 fn;
 
-       fn = psci_function_id[PSCI_FN_CPU_ON];
        err = invoke_psci_fn(fn, cpuid, entry_point, 0);
        return psci_to_linux_errno(err);
 }
 
-static int psci_migrate(unsigned long cpuid)
+static int psci_0_1_cpu_on(unsigned long cpuid, unsigned long entry_point)
+{
+       return __psci_cpu_on(psci_0_1_function_ids.cpu_on, cpuid, entry_point);
+}
+
+static int psci_0_2_cpu_on(unsigned long cpuid, unsigned long entry_point)
+{
+       return __psci_cpu_on(PSCI_FN_NATIVE(0_2, CPU_ON), cpuid, entry_point);
+}
+
+static int __psci_migrate(u32 fn, unsigned long cpuid)
 {
        int err;
-       u32 fn;
 
-       fn = psci_function_id[PSCI_FN_MIGRATE];
        err = invoke_psci_fn(fn, cpuid, 0, 0);
        return psci_to_linux_errno(err);
 }
 
+static int psci_0_1_migrate(unsigned long cpuid)
+{
+       return __psci_migrate(psci_0_1_function_ids.migrate, cpuid);
+}
+
+static int psci_0_2_migrate(unsigned long cpuid)
+{
+       return __psci_migrate(PSCI_FN_NATIVE(0_2, MIGRATE), cpuid);
+}
+
 static int psci_affinity_info(unsigned long target_affinity,
                unsigned long lowest_affinity_level)
 {
@@ -347,7 +383,7 @@ static void __init psci_init_system_suspend(void)
 
 static void __init psci_init_cpu_suspend(void)
 {
-       int feature = psci_features(psci_function_id[PSCI_FN_CPU_SUSPEND]);
+       int feature = psci_features(PSCI_FN_NATIVE(0_2, CPU_SUSPEND));
 
        if (feature != PSCI_RET_NOT_SUPPORTED)
                psci_cpu_suspend_feature = feature;
@@ -421,24 +457,16 @@ static void __init psci_init_smccc(void)
 static void __init psci_0_2_set_functions(void)
 {
        pr_info("Using standard PSCI v0.2 function IDs\n");
-       psci_ops.get_version = psci_get_version;
-
-       psci_function_id[PSCI_FN_CPU_SUSPEND] =
-                                       PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
-       psci_ops.cpu_suspend = psci_cpu_suspend;
-
-       psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
-       psci_ops.cpu_off = psci_cpu_off;
-
-       psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON);
-       psci_ops.cpu_on = psci_cpu_on;
 
-       psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE);
-       psci_ops.migrate = psci_migrate;
-
-       psci_ops.affinity_info = psci_affinity_info;
-
-       psci_ops.migrate_info_type = psci_migrate_info_type;
+       psci_ops = (struct psci_operations){
+               .get_version = psci_0_2_get_version,
+               .cpu_suspend = psci_0_2_cpu_suspend,
+               .cpu_off = psci_0_2_cpu_off,
+               .cpu_on = psci_0_2_cpu_on,
+               .migrate = psci_0_2_migrate,
+               .affinity_info = psci_affinity_info,
+               .migrate_info_type = psci_migrate_info_type,
+       };
 
        arm_pm_restart = psci_sys_reset;
 
@@ -450,7 +478,7 @@ static void __init psci_0_2_set_functions(void)
  */
 static int __init psci_probe(void)
 {
-       u32 ver = psci_get_version();
+       u32 ver = psci_0_2_get_version();
 
        pr_info("PSCIv%d.%d detected in firmware.\n",
                        PSCI_VERSION_MAJOR(ver),
@@ -514,24 +542,26 @@ static int __init psci_0_1_init(struct device_node *np)
 
        pr_info("Using PSCI v0.1 Function IDs from DT\n");
 
+       psci_ops.get_version = psci_0_1_get_version;
+
        if (!of_property_read_u32(np, "cpu_suspend", &id)) {
-               psci_function_id[PSCI_FN_CPU_SUSPEND] = id;
-               psci_ops.cpu_suspend = psci_cpu_suspend;
+               psci_0_1_function_ids.cpu_suspend = id;
+               psci_ops.cpu_suspend = psci_0_1_cpu_suspend;
        }
 
        if (!of_property_read_u32(np, "cpu_off", &id)) {
-               psci_function_id[PSCI_FN_CPU_OFF] = id;
-               psci_ops.cpu_off = psci_cpu_off;
+               psci_0_1_function_ids.cpu_off = id;
+               psci_ops.cpu_off = psci_0_1_cpu_off;
        }
 
        if (!of_property_read_u32(np, "cpu_on", &id)) {
-               psci_function_id[PSCI_FN_CPU_ON] = id;
-               psci_ops.cpu_on = psci_cpu_on;
+               psci_0_1_function_ids.cpu_on = id;
+               psci_ops.cpu_on = psci_0_1_cpu_on;
        }
 
        if (!of_property_read_u32(np, "migrate", &id)) {
-               psci_function_id[PSCI_FN_MIGRATE] = id;
-               psci_ops.migrate = psci_migrate;
+               psci_0_1_function_ids.migrate = id;
+               psci_ops.migrate = psci_0_1_migrate;
        }
 
        return 0;
index 5d4de5c..c70f46e 100644 (file)
@@ -59,8 +59,9 @@ config DEBUG_GPIO
          that are most common when setting up new platforms or boards.
 
 config GPIO_SYSFS
-       bool "/sys/class/gpio/... (sysfs interface)"
+       bool "/sys/class/gpio/... (sysfs interface)" if EXPERT
        depends on SYSFS
+       select GPIO_CDEV # We need to encourage the new ABI
        help
          Say Y here to add the legacy sysfs interface for GPIOs.
 
@@ -255,6 +256,7 @@ config GPIO_EP93XX
 config GPIO_EXAR
        tristate "Support for GPIO pins on XR17V352/354/358"
        depends on SERIAL_8250_EXAR
+       select REGMAP_MMIO
        help
          Selecting this option will enable handling of GPIO pins present
          on Exar XR17V352/354/358 chips.
@@ -296,6 +298,17 @@ config GPIO_GRGPIO
          Select this to support Aeroflex Gaisler GRGPIO cores from the GRLIB
          VHDL IP core library.
 
+config GPIO_HISI
+       tristate "HiSilicon GPIO controller driver"
+       depends on (ARM64 && ACPI) || COMPILE_TEST
+       select GPIO_GENERIC
+       select GPIOLIB_IRQCHIP
+       help
+         Say Y or M here to build support for the HiSilicon GPIO controller
+         driver GPIO block.
+         This GPIO controller support double-edge interrupt and multi-core
+         concurrent access.
+
 config GPIO_HLWD
        tristate "Nintendo Wii (Hollywood) GPIO"
        depends on OF_GPIO
@@ -737,6 +750,17 @@ config GPIO_AMD_FCH
          Note: This driver doesn't registers itself automatically, as it
          needs to be provided with platform specific configuration.
          (See eg. CONFIG_PCENGINES_APU2.)
+
+config GPIO_MSC313
+       bool "MStar MSC313 GPIO support"
+       depends on ARCH_MSTARV7
+       default ARCH_MSTARV7
+       select GPIOLIB_IRQCHIP
+       select IRQ_DOMAIN_HIERARCHY
+       help
+         Say Y here to support the main GPIO block on MStar/SigmaStar
+         ARMv7 based SoCs.
+
 endmenu
 
 menu "Port-mapped I/O GPIO drivers"
@@ -1590,6 +1614,8 @@ config GPIO_VIPERBOARD
 
 endmenu
 
+menu "Virtual GPIO drivers"
+
 config GPIO_AGGREGATOR
        tristate "GPIO Aggregator"
        help
@@ -1613,4 +1639,6 @@ config GPIO_MOCKUP
          tools/testing/selftests/gpio/gpio-mockup.sh. Reference the usage in
          it.
 
+endmenu
+
 endif
index 09dada8..35e3b60 100644 (file)
@@ -63,6 +63,7 @@ obj-$(CONFIG_GPIO_GE_FPGA)            += gpio-ge.o
 obj-$(CONFIG_GPIO_GPIO_MM)             += gpio-gpio-mm.o
 obj-$(CONFIG_GPIO_GRGPIO)              += gpio-grgpio.o
 obj-$(CONFIG_GPIO_GW_PLD)              += gpio-gw-pld.o
+obj-$(CONFIG_GPIO_HISI)                 += gpio-hisi.o
 obj-$(CONFIG_GPIO_HLWD)                        += gpio-hlwd.o
 obj-$(CONFIG_HTC_EGPIO)                        += gpio-htc-egpio.o
 obj-$(CONFIG_GPIO_ICH)                 += gpio-ich.o
@@ -101,6 +102,7 @@ obj-$(CONFIG_GPIO_MOCKUP)           += gpio-mockup.o
 obj-$(CONFIG_GPIO_MOXTET)              += gpio-moxtet.o
 obj-$(CONFIG_GPIO_MPC5200)             += gpio-mpc5200.o
 obj-$(CONFIG_GPIO_MPC8XXX)             += gpio-mpc8xxx.o
+obj-$(CONFIG_GPIO_MSC313)              += gpio-msc313.o
 obj-$(CONFIG_GPIO_MSIC)                        += gpio-msic.o
 obj-$(CONFIG_GPIO_MT7621)              += gpio-mt7621.o
 obj-$(CONFIG_GPIO_MVEBU)               += gpio-mvebu.o
index e560e45..0229fa7 100644 (file)
@@ -129,58 +129,9 @@ GPIOLIB irqchip
 The GPIOLIB irqchip is a helper irqchip for "simple cases" that should
 try to cover any generic kind of irqchip cascaded from a GPIO.
 
-- Convert all the GPIOLIB_IRQCHIP users to pass an irqchip template,
-  parent and flags before calling [devm_]gpiochip_add[_data]().
-  Currently we set up the irqchip after setting up the gpiochip
-  using gpiochip_irqchip_add() and gpiochip_set_[chained|nested]_irqchip().
-  This is too complex, so convert all users over to just set up
-  the irqchip before registering the gpio_chip, typical example:
-
-  /* Typical state container with dynamic irqchip */
-  struct my_gpio {
-      struct gpio_chip gc;
-      struct irq_chip irq;
-  };
-
-  int irq; /* from platform etc */
-  struct my_gpio *g;
-  struct gpio_irq_chip *girq;
-
-  /* Set up the irqchip dynamically */
-  g->irq.name = "my_gpio_irq";
-  g->irq.irq_ack = my_gpio_ack_irq;
-  g->irq.irq_mask = my_gpio_mask_irq;
-  g->irq.irq_unmask = my_gpio_unmask_irq;
-  g->irq.irq_set_type = my_gpio_set_irq_type;
-
-  /* Get a pointer to the gpio_irq_chip */
-  girq = &g->gc.irq;
-  girq->chip = &g->irq;
-  girq->parent_handler = ftgpio_gpio_irq_handler;
-  girq->num_parents = 1;
-  girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
-                               GFP_KERNEL);
-  if (!girq->parents)
-      return -ENOMEM;
-  girq->default_type = IRQ_TYPE_NONE;
-  girq->handler = handle_bad_irq;
-  girq->parents[0] = irq;
-
-  When this is done, we will delete the old APIs for instatiating
-  GPIOLIB_IRQCHIP and simplify the code.
-
 - Look over and identify any remaining easily converted drivers and
   dry-code conversions to gpiolib irqchip for maintainers to test
 
-- Drop gpiochip_set_chained_irqchip() when all the chained irqchips
-  have been converted to the above infrastructure.
-
-- Add more infrastructure to make it possible to also pass a threaded
-  irqchip in struct gpio_irq_chip.
-
-- Drop gpiochip_irqchip_add_nested() when all the chained irqchips
-  have been converted to the above infrastructure.
-
 
 Increase integration with pin control
 
@@ -191,3 +142,39 @@ use of the global GPIO numbers. Once the above is complete, it may
 make sense to simply join the subsystems into one and make pin
 multiplexing, pin configuration, GPIO, etc selectable options in one
 and the same pin control and GPIO subsystem.
+
+
+Debugfs in place of sysfs
+
+The old sysfs code that enables simple uses of GPIOs from the
+command line is still popular despite the existance of the proper
+character device. The reason is that it is simple to use on
+root filesystems where you only have a minimal set of tools such
+as "cat", "echo" etc.
+
+The old sysfs still need to be strongly deprecated and removed
+as it relies on the global GPIO numberspace that assume a strict
+order of global GPIO numbers that do not change between boots
+and is independent of probe order.
+
+To solve this and provide an ABI that people can use for hacks
+and development, implement a debugfs interface to manipulate
+GPIO lines that can do everything that sysfs can do today: one
+directory per gpiochip and one file entry per line:
+
+/sys/kernel/debug/gpiochip/gpiochip0
+/sys/kernel/debug/gpiochip/gpiochip0/gpio0
+/sys/kernel/debug/gpiochip/gpiochip0/gpio1
+/sys/kernel/debug/gpiochip/gpiochip0/gpio2
+/sys/kernel/debug/gpiochip/gpiochip0/gpio3
+...
+/sys/kernel/debug/gpiochip/gpiochip1
+/sys/kernel/debug/gpiochip/gpiochip1/gpio0
+/sys/kernel/debug/gpiochip/gpiochip1/gpio1
+...
+
+The exact files and design of the debugfs interface can be
+discussed but the idea is to provide a low-level access point
+for debugging and hacking and to expose all lines without the
+need of any exporting. Also provide ample ammunition to shoot
+oneself in the foot, because this is debugfs after all.
index 94c3a9b..b132afa 100644 (file)
@@ -132,8 +132,7 @@ static void idi_48_irq_mask(struct irq_data *data)
 
                                outb(idi48gpio->cos_enb, idi48gpio->base + 7);
 
-                               raw_spin_unlock_irqrestore(&idi48gpio->lock,
-                                                          flags);
+                               raw_spin_unlock_irqrestore(&idi48gpio->lock, flags);
                        }
 
                        return;
@@ -166,8 +165,7 @@ static void idi_48_irq_unmask(struct irq_data *data)
 
                                outb(idi48gpio->cos_enb, idi48gpio->base + 7);
 
-                               raw_spin_unlock_irqrestore(&idi48gpio->lock,
-                                                          flags);
+                               raw_spin_unlock_irqrestore(&idi48gpio->lock, flags);
                        }
 
                        return;
index fdcebe5..14e6b3e 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * GPIO driver for AMD 8111 south bridges
  *
  * Hardware driver for Intel i810 Random Number Generator (RNG)
  * Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
  * Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
- *
- * This file is licensed under  the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 #include <linux/ioport.h>
 #include <linux/module.h>
@@ -179,7 +176,6 @@ static int __init amd_gpio_init(void)
        struct pci_dev *pdev = NULL;
        const struct pci_device_id *ent;
 
-
        /* We look for our device - AMD South Bridge
         * I don't know about a system with two such bridges,
         * so we can assume that there is max. one device.
@@ -223,11 +219,10 @@ found:
 
        spin_lock_init(&gp.lock);
 
-       printk(KERN_INFO "AMD-8111 GPIO detected\n");
+       dev_info(&pdev->dev, "AMD-8111 GPIO detected\n");
        err = gpiochip_add_data(&gp.chip, &gp);
        if (err) {
-               printk(KERN_ERR "GPIO registering failed (%d)\n",
-                      err);
+               dev_err(&pdev->dev, "GPIO registering failed (%d)\n", err);
                ioport_unmap(gp.pm);
                goto out;
        }
index d535934..678ddd3 100644 (file)
@@ -123,6 +123,7 @@ static int ath79_gpio_irq_set_type(struct irq_data *data,
        switch (flow_type) {
        case IRQ_TYPE_EDGE_RISING:
                polarity |= mask;
+               fallthrough;
        case IRQ_TYPE_EDGE_FALLING:
        case IRQ_TYPE_EDGE_BOTH:
                break;
index a6f30ad..7920cf2 100644 (file)
@@ -175,13 +175,13 @@ static int bt8xxgpio_probe(struct pci_dev *dev,
 
        err = pci_enable_device(dev);
        if (err) {
-               printk(KERN_ERR "bt8xxgpio: Can't enable device.\n");
+               dev_err(&dev->dev, "can't enable device.\n");
                return err;
        }
        if (!devm_request_mem_region(&dev->dev, pci_resource_start(dev, 0),
                                pci_resource_len(dev, 0),
                                "bt8xxgpio")) {
-               printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n",
+               dev_warn(&dev->dev, "can't request iomem (0x%llx).\n",
                       (unsigned long long)pci_resource_start(dev, 0));
                err = -EBUSY;
                goto err_disable;
@@ -191,7 +191,7 @@ static int bt8xxgpio_probe(struct pci_dev *dev,
 
        bg->mmio = devm_ioremap(&dev->dev, pci_resource_start(dev, 0), 0x1000);
        if (!bg->mmio) {
-               printk(KERN_ERR "bt8xxgpio: ioremap() failed\n");
+               dev_err(&dev->dev, "ioremap() failed\n");
                err = -EIO;
                goto err_disable;
        }
@@ -207,7 +207,7 @@ static int bt8xxgpio_probe(struct pci_dev *dev,
        bt8xxgpio_gpio_setup(bg);
        err = gpiochip_add_data(&bg->gpio, bg);
        if (err) {
-               printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n");
+               dev_err(&dev->dev, "failed to register GPIOs\n");
                goto err_disable;
        }
 
index 53b24e3..6da3a24 100644 (file)
@@ -345,12 +345,8 @@ static int cs5535_gpio_probe(struct platform_device *pdev)
                                mask_orig, mask);
 
        /* finally, register with the generic GPIO API */
-       err = devm_gpiochip_add_data(&pdev->dev, &cs5535_gpio_chip.chip,
-                                    &cs5535_gpio_chip);
-       if (err)
-               return err;
-
-       return 0;
+       return devm_gpiochip_add_data(&pdev->dev, &cs5535_gpio_chip.chip,
+                                     &cs5535_gpio_chip);
 }
 
 static struct platform_driver cs5535_gpio_driver = {
index 4275c18..d3233cc 100644 (file)
@@ -616,10 +616,9 @@ static int dwapb_get_reset(struct dwapb_gpio *gpio)
        int err;
 
        gpio->rst = devm_reset_control_get_optional_shared(gpio->dev, NULL);
-       if (IS_ERR(gpio->rst)) {
-               dev_err(gpio->dev, "Cannot get reset descriptor\n");
-               return PTR_ERR(gpio->rst);
-       }
+       if (IS_ERR(gpio->rst))
+               return dev_err_probe(gpio->dev, PTR_ERR(gpio->rst),
+                                    "Cannot get reset descriptor\n");
 
        err = reset_control_deassert(gpio->rst);
        if (err) {
index b1accfb..d37de78 100644 (file)
@@ -4,14 +4,17 @@
  *
  * Copyright (C) 2015 Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
  */
+
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/gpio/driver.h>
+#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 
 #define EXAR_OFFSET_MPIOLVL_LO 0x90
 #define EXAR_OFFSET_MPIOSEL_LO 0x93
@@ -24,60 +27,39 @@ static DEFINE_IDA(ida_index);
 
 struct exar_gpio_chip {
        struct gpio_chip gpio_chip;
-       struct mutex lock;
+       struct regmap *regmap;
        int index;
-       void __iomem *regs;
        char name[20];
        unsigned int first_pin;
 };
 
-static void exar_update(struct gpio_chip *chip, unsigned int reg, int val,
-                       unsigned int offset)
+static unsigned int
+exar_offset_to_sel_addr(struct exar_gpio_chip *exar_gpio, unsigned int offset)
 {
-       struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
-       int temp;
-
-       mutex_lock(&exar_gpio->lock);
-       temp = readb(exar_gpio->regs + reg);
-       temp &= ~BIT(offset);
-       if (val)
-               temp |= BIT(offset);
-       writeb(temp, exar_gpio->regs + reg);
-       mutex_unlock(&exar_gpio->lock);
+       return (offset + exar_gpio->first_pin) / 8 ? EXAR_OFFSET_MPIOSEL_HI
+                                                  : EXAR_OFFSET_MPIOSEL_LO;
 }
 
-static int exar_set_direction(struct gpio_chip *chip, int direction,
-                             unsigned int offset)
+static unsigned int
+exar_offset_to_lvl_addr(struct exar_gpio_chip *exar_gpio, unsigned int offset)
 {
-       struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
-       unsigned int addr = (offset + exar_gpio->first_pin) / 8 ?
-               EXAR_OFFSET_MPIOSEL_HI : EXAR_OFFSET_MPIOSEL_LO;
-       unsigned int bit  = (offset + exar_gpio->first_pin) % 8;
-
-       exar_update(chip, addr, direction, bit);
-       return 0;
+       return (offset + exar_gpio->first_pin) / 8 ? EXAR_OFFSET_MPIOLVL_HI
+                                                  : EXAR_OFFSET_MPIOLVL_LO;
 }
 
-static int exar_get(struct gpio_chip *chip, unsigned int reg)
+static unsigned int
+exar_offset_to_bit(struct exar_gpio_chip *exar_gpio, unsigned int offset)
 {
-       struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
-       int value;
-
-       mutex_lock(&exar_gpio->lock);
-       value = readb(exar_gpio->regs + reg);
-       mutex_unlock(&exar_gpio->lock);
-
-       return value;
+       return (offset + exar_gpio->first_pin) % 8;
 }
 
 static int exar_get_direction(struct gpio_chip *chip, unsigned int offset)
 {
        struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
-       unsigned int addr = (offset + exar_gpio->first_pin) / 8 ?
-               EXAR_OFFSET_MPIOSEL_HI : EXAR_OFFSET_MPIOSEL_LO;
-       unsigned int bit  = (offset + exar_gpio->first_pin) % 8;
+       unsigned int addr = exar_offset_to_sel_addr(exar_gpio, offset);
+       unsigned int bit = exar_offset_to_bit(exar_gpio, offset);
 
-       if (exar_get(chip, addr) & BIT(bit))
+       if (regmap_test_bits(exar_gpio->regmap, addr, BIT(bit)))
                return GPIO_LINE_DIRECTION_IN;
 
        return GPIO_LINE_DIRECTION_OUT;
@@ -86,39 +68,66 @@ static int exar_get_direction(struct gpio_chip *chip, unsigned int offset)
 static int exar_get_value(struct gpio_chip *chip, unsigned int offset)
 {
        struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
-       unsigned int addr = (offset + exar_gpio->first_pin) / 8 ?
-               EXAR_OFFSET_MPIOLVL_HI : EXAR_OFFSET_MPIOLVL_LO;
-       unsigned int bit  = (offset + exar_gpio->first_pin) % 8;
+       unsigned int addr = exar_offset_to_lvl_addr(exar_gpio, offset);
+       unsigned int bit = exar_offset_to_bit(exar_gpio, offset);
 
-       return !!(exar_get(chip, addr) & BIT(bit));
+       return !!(regmap_test_bits(exar_gpio->regmap, addr, BIT(bit)));
 }
 
 static void exar_set_value(struct gpio_chip *chip, unsigned int offset,
                           int value)
 {
        struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
-       unsigned int addr = (offset + exar_gpio->first_pin) / 8 ?
-               EXAR_OFFSET_MPIOLVL_HI : EXAR_OFFSET_MPIOLVL_LO;
-       unsigned int bit  = (offset + exar_gpio->first_pin) % 8;
+       unsigned int addr = exar_offset_to_lvl_addr(exar_gpio, offset);
+       unsigned int bit = exar_offset_to_bit(exar_gpio, offset);
 
-       exar_update(chip, addr, value, bit);
+       if (value)
+               regmap_set_bits(exar_gpio->regmap, addr, BIT(bit));
+       else
+               regmap_clear_bits(exar_gpio->regmap, addr, BIT(bit));
 }
 
 static int exar_direction_output(struct gpio_chip *chip, unsigned int offset,
                                 int value)
 {
+       struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
+       unsigned int addr = exar_offset_to_sel_addr(exar_gpio, offset);
+       unsigned int bit = exar_offset_to_bit(exar_gpio, offset);
+
        exar_set_value(chip, offset, value);
-       return exar_set_direction(chip, 0, offset);
+       regmap_clear_bits(exar_gpio->regmap, addr, BIT(bit));
+
+       return 0;
 }
 
 static int exar_direction_input(struct gpio_chip *chip, unsigned int offset)
 {
-       return exar_set_direction(chip, 1, offset);
+       struct exar_gpio_chip *exar_gpio = gpiochip_get_data(chip);
+       unsigned int addr = exar_offset_to_sel_addr(exar_gpio, offset);
+       unsigned int bit = exar_offset_to_bit(exar_gpio, offset);
+
+       regmap_set_bits(exar_gpio->regmap, addr, BIT(bit));
+
+       return 0;
 }
 
+static void exar_devm_ida_free(void *data)
+{
+       struct exar_gpio_chip *exar_gpio = data;
+
+       ida_free(&ida_index, exar_gpio->index);
+}
+
+static const struct regmap_config exar_regmap_config = {
+       .name           = "exar-gpio",
+       .reg_bits       = 16,
+       .val_bits       = 8,
+};
+
 static int gpio_exar_probe(struct platform_device *pdev)
 {
-       struct pci_dev *pcidev = to_pci_dev(pdev->dev.parent);
+       struct device *dev = &pdev->dev;
+       struct pci_dev *pcidev = to_pci_dev(dev->parent);
        struct exar_gpio_chip *exar_gpio;
        u32 first_pin, ngpios;
        void __iomem *p;
@@ -132,30 +141,37 @@ static int gpio_exar_probe(struct platform_device *pdev)
        if (!p)
                return -ENOMEM;
 
-       ret = device_property_read_u32(&pdev->dev, "exar,first-pin",
-                                      &first_pin);
+       ret = device_property_read_u32(dev, "exar,first-pin", &first_pin);
        if (ret)
                return ret;
 
-       ret = device_property_read_u32(&pdev->dev, "ngpios", &ngpios);
+       ret = device_property_read_u32(dev, "ngpios", &ngpios);
        if (ret)
                return ret;
 
-       exar_gpio = devm_kzalloc(&pdev->dev, sizeof(*exar_gpio), GFP_KERNEL);
+       exar_gpio = devm_kzalloc(dev, sizeof(*exar_gpio), GFP_KERNEL);
        if (!exar_gpio)
                return -ENOMEM;
 
-       mutex_init(&exar_gpio->lock);
+       /*
+        * We don't need to check the return values of mmio regmap operations (unless
+        * the regmap has a clock attached which is not the case here).
+        */
+       exar_gpio->regmap = devm_regmap_init_mmio(dev, p, &exar_regmap_config);
+       if (IS_ERR(exar_gpio->regmap))
+               return PTR_ERR(exar_gpio->regmap);
+
+       index = ida_alloc(&ida_index, GFP_KERNEL);
+       if (index < 0)
+               return index;
 
-       index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL);
-       if (index < 0) {
-               ret = index;
-               goto err_mutex_destroy;
-       }
+       ret = devm_add_action_or_reset(dev, exar_devm_ida_free, exar_gpio);
+       if (ret)
+               return ret;
 
        sprintf(exar_gpio->name, "exar_gpio%d", index);
        exar_gpio->gpio_chip.label = exar_gpio->name;
-       exar_gpio->gpio_chip.parent = &pdev->dev;
+       exar_gpio->gpio_chip.parent = dev;
        exar_gpio->gpio_chip.direction_output = exar_direction_output;
        exar_gpio->gpio_chip.direction_input = exar_direction_input;
        exar_gpio->gpio_chip.get_direction = exar_get_direction;
@@ -163,39 +179,20 @@ static int gpio_exar_probe(struct platform_device *pdev)
        exar_gpio->gpio_chip.set = exar_set_value;
        exar_gpio->gpio_chip.base = -1;
        exar_gpio->gpio_chip.ngpio = ngpios;
-       exar_gpio->regs = p;
        exar_gpio->index = index;
        exar_gpio->first_pin = first_pin;
 
-       ret = devm_gpiochip_add_data(&pdev->dev,
-                                    &exar_gpio->gpio_chip, exar_gpio);
+       ret = devm_gpiochip_add_data(dev, &exar_gpio->gpio_chip, exar_gpio);
        if (ret)
-               goto err_destroy;
+               return ret;
 
        platform_set_drvdata(pdev, exar_gpio);
 
        return 0;
-
-err_destroy:
-       ida_simple_remove(&ida_index, index);
-err_mutex_destroy:
-       mutex_destroy(&exar_gpio->lock);
-       return ret;
-}
-
-static int gpio_exar_remove(struct platform_device *pdev)
-{
-       struct exar_gpio_chip *exar_gpio = platform_get_drvdata(pdev);
-
-       ida_simple_remove(&ida_index, exar_gpio->index);
-       mutex_destroy(&exar_gpio->lock);
-
-       return 0;
 }
 
 static struct platform_driver gpio_exar_driver = {
        .probe  = gpio_exar_probe,
-       .remove = gpio_exar_remove,
        .driver = {
                .name = DRIVER_NAME,
        },
diff --git a/drivers/gpio/gpio-hisi.c b/drivers/gpio/gpio-hisi.c
new file mode 100644 (file)
index 0000000..ad3d4da
--- /dev/null
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 HiSilicon Limited. */
+#include <linux/gpio/driver.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+#define HISI_GPIO_SWPORT_DR_SET_WX     0x000
+#define HISI_GPIO_SWPORT_DR_CLR_WX     0x004
+#define HISI_GPIO_SWPORT_DDR_SET_WX    0x010
+#define HISI_GPIO_SWPORT_DDR_CLR_WX    0x014
+#define HISI_GPIO_SWPORT_DDR_ST_WX     0x018
+#define HISI_GPIO_INTEN_SET_WX         0x020
+#define HISI_GPIO_INTEN_CLR_WX         0x024
+#define HISI_GPIO_INTMASK_SET_WX       0x030
+#define HISI_GPIO_INTMASK_CLR_WX       0x034
+#define HISI_GPIO_INTTYPE_EDGE_SET_WX  0x040
+#define HISI_GPIO_INTTYPE_EDGE_CLR_WX  0x044
+#define HISI_GPIO_INT_POLARITY_SET_WX  0x050
+#define HISI_GPIO_INT_POLARITY_CLR_WX  0x054
+#define HISI_GPIO_DEBOUNCE_SET_WX      0x060
+#define HISI_GPIO_DEBOUNCE_CLR_WX      0x064
+#define HISI_GPIO_INTSTATUS_WX         0x070
+#define HISI_GPIO_PORTA_EOI_WX         0x078
+#define HISI_GPIO_EXT_PORT_WX          0x080
+#define HISI_GPIO_INTCOMB_MASK_WX      0x0a0
+#define HISI_GPIO_INT_DEDGE_SET                0x0b0
+#define HISI_GPIO_INT_DEDGE_CLR                0x0b4
+#define HISI_GPIO_INT_DEDGE_ST         0x0b8
+
+#define HISI_GPIO_LINE_NUM_MAX 32
+#define HISI_GPIO_DRIVER_NAME  "gpio-hisi"
+
+struct hisi_gpio {
+       struct gpio_chip        chip;
+       struct device           *dev;
+       void __iomem            *reg_base;
+       unsigned int            line_num;
+       struct irq_chip         irq_chip;
+       int                     irq;
+};
+
+static inline u32 hisi_gpio_read_reg(struct gpio_chip *chip,
+                                    unsigned int off)
+{
+       struct hisi_gpio *hisi_gpio =
+                       container_of(chip, struct hisi_gpio, chip);
+       void __iomem *reg = hisi_gpio->reg_base + off;
+
+       return readl(reg);
+}
+
+static inline void hisi_gpio_write_reg(struct gpio_chip *chip,
+                                      unsigned int off, u32 val)
+{
+       struct hisi_gpio *hisi_gpio =
+                       container_of(chip, struct hisi_gpio, chip);
+       void __iomem *reg = hisi_gpio->reg_base + off;
+
+       writel(val, reg);
+}
+
+static void hisi_gpio_set_debounce(struct gpio_chip *chip, unsigned int off,
+                                  u32 debounce)
+{
+       if (debounce)
+               hisi_gpio_write_reg(chip, HISI_GPIO_DEBOUNCE_SET_WX, BIT(off));
+       else
+               hisi_gpio_write_reg(chip, HISI_GPIO_DEBOUNCE_CLR_WX, BIT(off));
+}
+
+static int hisi_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
+                               unsigned long config)
+{
+       u32 config_para = pinconf_to_config_param(config);
+       u32 config_arg;
+
+       switch (config_para) {
+       case PIN_CONFIG_INPUT_DEBOUNCE:
+               config_arg = pinconf_to_config_argument(config);
+               hisi_gpio_set_debounce(chip, offset, config_arg);
+               break;
+       default:
+               return -ENOTSUPP;
+       }
+
+       return 0;
+}
+
+static void hisi_gpio_set_ack(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       hisi_gpio_write_reg(chip, HISI_GPIO_PORTA_EOI_WX, BIT(irqd_to_hwirq(d)));
+}
+
+static void hisi_gpio_irq_set_mask(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       hisi_gpio_write_reg(chip, HISI_GPIO_INTMASK_SET_WX, BIT(irqd_to_hwirq(d)));
+}
+
+static void hisi_gpio_irq_clr_mask(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       hisi_gpio_write_reg(chip, HISI_GPIO_INTMASK_CLR_WX, BIT(irqd_to_hwirq(d)));
+}
+
+static int hisi_gpio_irq_set_type(struct irq_data *d, u32 type)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+       unsigned int mask = BIT(irqd_to_hwirq(d));
+
+       switch (type) {
+       case IRQ_TYPE_EDGE_BOTH:
+               hisi_gpio_write_reg(chip, HISI_GPIO_INT_DEDGE_SET, mask);
+               break;
+       case IRQ_TYPE_EDGE_RISING:
+               hisi_gpio_write_reg(chip, HISI_GPIO_INTTYPE_EDGE_SET_WX, mask);
+               hisi_gpio_write_reg(chip, HISI_GPIO_INT_POLARITY_SET_WX, mask);
+               break;
+       case IRQ_TYPE_EDGE_FALLING:
+               hisi_gpio_write_reg(chip, HISI_GPIO_INTTYPE_EDGE_SET_WX, mask);
+               hisi_gpio_write_reg(chip, HISI_GPIO_INT_POLARITY_CLR_WX, mask);
+               break;
+       case IRQ_TYPE_LEVEL_HIGH:
+               hisi_gpio_write_reg(chip, HISI_GPIO_INTTYPE_EDGE_CLR_WX, mask);
+               hisi_gpio_write_reg(chip, HISI_GPIO_INT_POLARITY_SET_WX, mask);
+               break;
+       case IRQ_TYPE_LEVEL_LOW:
+               hisi_gpio_write_reg(chip, HISI_GPIO_INTTYPE_EDGE_CLR_WX, mask);
+               hisi_gpio_write_reg(chip, HISI_GPIO_INT_POLARITY_CLR_WX, mask);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /*
+        * The dual-edge interrupt and other interrupt's registers do not
+        * take effect at the same time. The registers of the two-edge
+        * interrupts have higher priorities, the configuration of
+        * the dual-edge interrupts must be disabled before the configuration
+        * of other kind of interrupts.
+        */
+       if (type != IRQ_TYPE_EDGE_BOTH) {
+               unsigned int both = hisi_gpio_read_reg(chip, HISI_GPIO_INT_DEDGE_ST);
+
+               if (both & mask)
+                       hisi_gpio_write_reg(chip, HISI_GPIO_INT_DEDGE_CLR, mask);
+       }
+
+       if (type & IRQ_TYPE_LEVEL_MASK)
+               irq_set_handler_locked(d, handle_level_irq);
+       else if (type & IRQ_TYPE_EDGE_BOTH)
+               irq_set_handler_locked(d, handle_edge_irq);
+
+       return 0;
+}
+
+static void hisi_gpio_irq_enable(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       hisi_gpio_irq_clr_mask(d);
+       hisi_gpio_write_reg(chip, HISI_GPIO_INTEN_SET_WX, BIT(irqd_to_hwirq(d)));
+}
+
+static void hisi_gpio_irq_disable(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       hisi_gpio_irq_set_mask(d);
+       hisi_gpio_write_reg(chip, HISI_GPIO_INTEN_CLR_WX, BIT(irqd_to_hwirq(d)));
+}
+
+static void hisi_gpio_irq_handler(struct irq_desc *desc)
+{
+       struct hisi_gpio *hisi_gpio = irq_desc_get_handler_data(desc);
+       unsigned long irq_msk = hisi_gpio_read_reg(&hisi_gpio->chip,
+                                                  HISI_GPIO_INTSTATUS_WX);
+       struct irq_chip *irq_c = irq_desc_get_chip(desc);
+       int hwirq;
+
+       chained_irq_enter(irq_c, desc);
+       for_each_set_bit(hwirq, &irq_msk, HISI_GPIO_LINE_NUM_MAX)
+               generic_handle_irq(irq_find_mapping(hisi_gpio->chip.irq.domain,
+                                                   hwirq));
+       chained_irq_exit(irq_c, desc);
+}
+
+static void hisi_gpio_init_irq(struct hisi_gpio *hisi_gpio)
+{
+       struct gpio_chip *chip = &hisi_gpio->chip;
+       struct gpio_irq_chip *girq_chip = &chip->irq;
+
+       /* Set hooks for irq_chip */
+       hisi_gpio->irq_chip.irq_ack = hisi_gpio_set_ack;
+       hisi_gpio->irq_chip.irq_mask = hisi_gpio_irq_set_mask;
+       hisi_gpio->irq_chip.irq_unmask = hisi_gpio_irq_clr_mask;
+       hisi_gpio->irq_chip.irq_set_type = hisi_gpio_irq_set_type;
+       hisi_gpio->irq_chip.irq_enable = hisi_gpio_irq_enable;
+       hisi_gpio->irq_chip.irq_disable = hisi_gpio_irq_disable;
+
+       girq_chip->chip = &hisi_gpio->irq_chip;
+       girq_chip->default_type = IRQ_TYPE_NONE;
+       girq_chip->num_parents = 1;
+       girq_chip->parents = &hisi_gpio->irq;
+       girq_chip->parent_handler = hisi_gpio_irq_handler;
+       girq_chip->parent_handler_data = hisi_gpio;
+
+       /* Clear Mask of GPIO controller combine IRQ */
+       hisi_gpio_write_reg(chip, HISI_GPIO_INTCOMB_MASK_WX, 1);
+}
+
+static const struct acpi_device_id hisi_gpio_acpi_match[] = {
+       {"HISI0184", 0},
+       {}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_gpio_acpi_match);
+
+static void hisi_gpio_get_pdata(struct device *dev,
+                               struct hisi_gpio *hisi_gpio)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct fwnode_handle *fwnode;
+       int idx = 0;
+
+       device_for_each_child_node(dev, fwnode)  {
+               /* Cycle for once, no need for an array to save line_num */
+               if (fwnode_property_read_u32(fwnode, "ngpios",
+                                            &hisi_gpio->line_num)) {
+                       dev_err(dev,
+                               "failed to get number of lines for port%d and use default value instead\n",
+                               idx);
+                       hisi_gpio->line_num = HISI_GPIO_LINE_NUM_MAX;
+               }
+
+               if (WARN_ON(hisi_gpio->line_num > HISI_GPIO_LINE_NUM_MAX))
+                       hisi_gpio->line_num = HISI_GPIO_LINE_NUM_MAX;
+
+               hisi_gpio->irq = platform_get_irq(pdev, idx);
+
+               dev_info(dev,
+                        "get hisi_gpio[%d] with %d lines\n", idx,
+                        hisi_gpio->line_num);
+
+               idx++;
+       }
+}
+
+static int hisi_gpio_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct hisi_gpio *hisi_gpio;
+       int port_num;
+       int ret;
+
+       /*
+        * One GPIO controller own one port currently,
+        * if we get more from ACPI table, return error.
+        */
+       port_num = device_get_child_node_count(dev);
+       if (WARN_ON(port_num != 1))
+               return -ENODEV;
+
+       hisi_gpio = devm_kzalloc(dev, sizeof(*hisi_gpio), GFP_KERNEL);
+       if (!hisi_gpio)
+               return -ENOMEM;
+
+       hisi_gpio->reg_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(hisi_gpio->reg_base))
+               return PTR_ERR(hisi_gpio->reg_base);
+
+       hisi_gpio_get_pdata(dev, hisi_gpio);
+
+       hisi_gpio->dev = dev;
+
+       ret = bgpio_init(&hisi_gpio->chip, hisi_gpio->dev, 0x4,
+                        hisi_gpio->reg_base + HISI_GPIO_EXT_PORT_WX,
+                        hisi_gpio->reg_base + HISI_GPIO_SWPORT_DR_SET_WX,
+                        hisi_gpio->reg_base + HISI_GPIO_SWPORT_DR_CLR_WX,
+                        hisi_gpio->reg_base + HISI_GPIO_SWPORT_DDR_SET_WX,
+                        hisi_gpio->reg_base + HISI_GPIO_SWPORT_DDR_CLR_WX,
+                        BGPIOF_NO_SET_ON_INPUT);
+       if (ret) {
+               dev_err(dev, "failed to init, ret = %d\n", ret);
+               return ret;
+       }
+
+       hisi_gpio->chip.set_config = hisi_gpio_set_config;
+       hisi_gpio->chip.ngpio = hisi_gpio->line_num;
+       hisi_gpio->chip.bgpio_dir_unreadable = 1;
+       hisi_gpio->chip.base = -1;
+
+       if (hisi_gpio->irq > 0)
+               hisi_gpio_init_irq(hisi_gpio);
+
+       ret = devm_gpiochip_add_data(dev, &hisi_gpio->chip, hisi_gpio);
+       if (ret) {
+               dev_err(dev, "failed to register gpiochip, ret = %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static struct platform_driver hisi_gpio_driver = {
+       .driver         = {
+               .name   = HISI_GPIO_DRIVER_NAME,
+               .acpi_match_table = hisi_gpio_acpi_match,
+       },
+       .probe          = hisi_gpio_probe,
+};
+
+module_platform_driver(hisi_gpio_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Luo Jiaxing <luojiaxing@huawei.com>");
+MODULE_DESCRIPTION("HiSilicon GPIO controller driver");
+MODULE_ALIAS("platform:" HISI_GPIO_DRIVER_NAME);
index 67ed4f2..28b757d 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/irq.h>
 #include <linux/irq_sim.h>
 #include <linux/irqdomain.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
@@ -460,9 +461,16 @@ static int gpio_mockup_probe(struct platform_device *pdev)
        return 0;
 }
 
+static const struct of_device_id gpio_mockup_of_match[] = {
+       { .compatible = "gpio-mockup", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, gpio_mockup_of_match);
+
 static struct platform_driver gpio_mockup_driver = {
        .driver = {
                .name = "gpio-mockup",
+               .of_match_table = gpio_mockup_of_match,
        },
        .probe = gpio_mockup_probe,
 };
@@ -556,8 +564,7 @@ static int __init gpio_mockup_init(void)
 {
        int i, num_chips, err;
 
-       if ((gpio_mockup_num_ranges < 2) ||
-           (gpio_mockup_num_ranges % 2) ||
+       if ((gpio_mockup_num_ranges % 2) ||
            (gpio_mockup_num_ranges > GPIO_MOCKUP_MAX_RANGES))
                return -EINVAL;
 
diff --git a/drivers/gpio/gpio-msc313.c b/drivers/gpio/gpio-msc313.c
new file mode 100644 (file)
index 0000000..da31a5f
--- /dev/null
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2020 Daniel Palmer<daniel@thingy.jp> */
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/gpio/driver.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <dt-bindings/gpio/msc313-gpio.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#define DRIVER_NAME "gpio-msc313"
+
+#define MSC313_GPIO_IN  BIT(0)
+#define MSC313_GPIO_OUT BIT(4)
+#define MSC313_GPIO_OEN BIT(5)
+
+/*
+ * These bits need to be saved to correctly restore the
+ * gpio state when resuming from suspend to memory.
+ */
+#define MSC313_GPIO_BITSTOSAVE (MSC313_GPIO_OUT | MSC313_GPIO_OEN)
+
+/* pad names for fuart, same for all SoCs so far */
+#define MSC313_PINNAME_FUART_RX                "fuart_rx"
+#define MSC313_PINNAME_FUART_TX                "fuart_tx"
+#define MSC313_PINNAME_FUART_CTS       "fuart_cts"
+#define MSC313_PINNAME_FUART_RTS       "fuart_rts"
+
+/* pad names for sr, mercury5 is different */
+#define MSC313_PINNAME_SR_IO2          "sr_io2"
+#define MSC313_PINNAME_SR_IO3          "sr_io3"
+#define MSC313_PINNAME_SR_IO4          "sr_io4"
+#define MSC313_PINNAME_SR_IO5          "sr_io5"
+#define MSC313_PINNAME_SR_IO6          "sr_io6"
+#define MSC313_PINNAME_SR_IO7          "sr_io7"
+#define MSC313_PINNAME_SR_IO8          "sr_io8"
+#define MSC313_PINNAME_SR_IO9          "sr_io9"
+#define MSC313_PINNAME_SR_IO10         "sr_io10"
+#define MSC313_PINNAME_SR_IO11         "sr_io11"
+#define MSC313_PINNAME_SR_IO12         "sr_io12"
+#define MSC313_PINNAME_SR_IO13         "sr_io13"
+#define MSC313_PINNAME_SR_IO14         "sr_io14"
+#define MSC313_PINNAME_SR_IO15         "sr_io15"
+#define MSC313_PINNAME_SR_IO16         "sr_io16"
+#define MSC313_PINNAME_SR_IO17         "sr_io17"
+
+/* pad names for sd, same for all SoCs so far */
+#define MSC313_PINNAME_SD_CLK          "sd_clk"
+#define MSC313_PINNAME_SD_CMD          "sd_cmd"
+#define MSC313_PINNAME_SD_D0           "sd_d0"
+#define MSC313_PINNAME_SD_D1           "sd_d1"
+#define MSC313_PINNAME_SD_D2           "sd_d2"
+#define MSC313_PINNAME_SD_D3           "sd_d3"
+
+/* pad names for i2c1, same for all SoCs so for */
+#define MSC313_PINNAME_I2C1_SCL                "i2c1_scl"
+#define MSC313_PINNAME_I2C1_SCA                "i2c1_sda"
+
+/* pad names for spi0, same for all SoCs so far */
+#define MSC313_PINNAME_SPI0_CZ         "spi0_cz"
+#define MSC313_PINNAME_SPI0_CK         "spi0_ck"
+#define MSC313_PINNAME_SPI0_DI         "spi0_di"
+#define MSC313_PINNAME_SPI0_DO         "spi0_do"
+
+#define FUART_NAMES                    \
+       MSC313_PINNAME_FUART_RX,        \
+       MSC313_PINNAME_FUART_TX,        \
+       MSC313_PINNAME_FUART_CTS,       \
+       MSC313_PINNAME_FUART_RTS
+
+#define OFF_FUART_RX   0x50
+#define OFF_FUART_TX   0x54
+#define OFF_FUART_CTS  0x58
+#define OFF_FUART_RTS  0x5c
+
+#define FUART_OFFSETS  \
+       OFF_FUART_RX,   \
+       OFF_FUART_TX,   \
+       OFF_FUART_CTS,  \
+       OFF_FUART_RTS
+
+#define SR_NAMES               \
+       MSC313_PINNAME_SR_IO2,  \
+       MSC313_PINNAME_SR_IO3,  \
+       MSC313_PINNAME_SR_IO4,  \
+       MSC313_PINNAME_SR_IO5,  \
+       MSC313_PINNAME_SR_IO6,  \
+       MSC313_PINNAME_SR_IO7,  \
+       MSC313_PINNAME_SR_IO8,  \
+       MSC313_PINNAME_SR_IO9,  \
+       MSC313_PINNAME_SR_IO10, \
+       MSC313_PINNAME_SR_IO11, \
+       MSC313_PINNAME_SR_IO12, \
+       MSC313_PINNAME_SR_IO13, \
+       MSC313_PINNAME_SR_IO14, \
+       MSC313_PINNAME_SR_IO15, \
+       MSC313_PINNAME_SR_IO16, \
+       MSC313_PINNAME_SR_IO17
+
+#define OFF_SR_IO2     0x88
+#define OFF_SR_IO3     0x8c
+#define OFF_SR_IO4     0x90
+#define OFF_SR_IO5     0x94
+#define OFF_SR_IO6     0x98
+#define OFF_SR_IO7     0x9c
+#define OFF_SR_IO8     0xa0
+#define OFF_SR_IO9     0xa4
+#define OFF_SR_IO10    0xa8
+#define OFF_SR_IO11    0xac
+#define OFF_SR_IO12    0xb0
+#define OFF_SR_IO13    0xb4
+#define OFF_SR_IO14    0xb8
+#define OFF_SR_IO15    0xbc
+#define OFF_SR_IO16    0xc0
+#define OFF_SR_IO17    0xc4
+
+#define SR_OFFSETS     \
+       OFF_SR_IO2,     \
+       OFF_SR_IO3,     \
+       OFF_SR_IO4,     \
+       OFF_SR_IO5,     \
+       OFF_SR_IO6,     \
+       OFF_SR_IO7,     \
+       OFF_SR_IO8,     \
+       OFF_SR_IO9,     \
+       OFF_SR_IO10,    \
+       OFF_SR_IO11,    \
+       OFF_SR_IO12,    \
+       OFF_SR_IO13,    \
+       OFF_SR_IO14,    \
+       OFF_SR_IO15,    \
+       OFF_SR_IO16,    \
+       OFF_SR_IO17
+
+#define SD_NAMES               \
+       MSC313_PINNAME_SD_CLK,  \
+       MSC313_PINNAME_SD_CMD,  \
+       MSC313_PINNAME_SD_D0,   \
+       MSC313_PINNAME_SD_D1,   \
+       MSC313_PINNAME_SD_D2,   \
+       MSC313_PINNAME_SD_D3
+
+#define OFF_SD_CLK     0x140
+#define OFF_SD_CMD     0x144
+#define OFF_SD_D0      0x148
+#define OFF_SD_D1      0x14c
+#define OFF_SD_D2      0x150
+#define OFF_SD_D3      0x154
+
+#define SD_OFFSETS     \
+       OFF_SD_CLK,     \
+       OFF_SD_CMD,     \
+       OFF_SD_D0,      \
+       OFF_SD_D1,      \
+       OFF_SD_D2,      \
+       OFF_SD_D3
+
+#define I2C1_NAMES                     \
+       MSC313_PINNAME_I2C1_SCL,        \
+       MSC313_PINNAME_I2C1_SCA
+
+#define OFF_I2C1_SCL   0x188
+#define OFF_I2C1_SCA   0x18c
+
+#define I2C1_OFFSETS   \
+       OFF_I2C1_SCL,   \
+       OFF_I2C1_SCA
+
+#define SPI0_NAMES             \
+       MSC313_PINNAME_SPI0_CZ, \
+       MSC313_PINNAME_SPI0_CK, \
+       MSC313_PINNAME_SPI0_DI, \
+       MSC313_PINNAME_SPI0_DO
+
+#define OFF_SPI0_CZ    0x1c0
+#define OFF_SPI0_CK    0x1c4
+#define OFF_SPI0_DI    0x1c8
+#define OFF_SPI0_DO    0x1cc
+
+#define SPI0_OFFSETS   \
+       OFF_SPI0_CZ,    \
+       OFF_SPI0_CK,    \
+       OFF_SPI0_DI,    \
+       OFF_SPI0_DO
+
+struct msc313_gpio_data {
+       const char * const *names;
+       const unsigned int *offsets;
+       const unsigned int num;
+};
+
+#define MSC313_GPIO_CHIPDATA(_chip) \
+static const struct msc313_gpio_data _chip##_data = { \
+       .names = _chip##_names, \
+       .offsets = _chip##_offsets, \
+       .num = ARRAY_SIZE(_chip##_offsets), \
+}
+
+#ifdef CONFIG_MACH_INFINITY
+static const char * const msc313_names[] = {
+       FUART_NAMES,
+       SR_NAMES,
+       SD_NAMES,
+       I2C1_NAMES,
+       SPI0_NAMES,
+};
+
+static const unsigned int msc313_offsets[] = {
+       FUART_OFFSETS,
+       SR_OFFSETS,
+       SD_OFFSETS,
+       I2C1_OFFSETS,
+       SPI0_OFFSETS,
+};
+
+MSC313_GPIO_CHIPDATA(msc313);
+#endif
+
+struct msc313_gpio {
+       void __iomem *base;
+       const struct msc313_gpio_data *gpio_data;
+       u8 *saved;
+};
+
+static void msc313_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+       struct msc313_gpio *gpio = gpiochip_get_data(chip);
+       u8 gpioreg = readb_relaxed(gpio->base + gpio->gpio_data->offsets[offset]);
+
+       if (value)
+               gpioreg |= MSC313_GPIO_OUT;
+       else
+               gpioreg &= ~MSC313_GPIO_OUT;
+
+       writeb_relaxed(gpioreg, gpio->base + gpio->gpio_data->offsets[offset]);
+}
+
+static int msc313_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+       struct msc313_gpio *gpio = gpiochip_get_data(chip);
+
+       return readb_relaxed(gpio->base + gpio->gpio_data->offsets[offset]) & MSC313_GPIO_IN;
+}
+
+static int msc313_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
+{
+       struct msc313_gpio *gpio = gpiochip_get_data(chip);
+       u8 gpioreg = readb_relaxed(gpio->base + gpio->gpio_data->offsets[offset]);
+
+       gpioreg |= MSC313_GPIO_OEN;
+       writeb_relaxed(gpioreg, gpio->base + gpio->gpio_data->offsets[offset]);
+
+       return 0;
+}
+
+static int msc313_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value)
+{
+       struct msc313_gpio *gpio = gpiochip_get_data(chip);
+       u8 gpioreg = readb_relaxed(gpio->base + gpio->gpio_data->offsets[offset]);
+
+       gpioreg &= ~MSC313_GPIO_OEN;
+       if (value)
+               gpioreg |= MSC313_GPIO_OUT;
+       else
+               gpioreg &= ~MSC313_GPIO_OUT;
+       writeb_relaxed(gpioreg, gpio->base + gpio->gpio_data->offsets[offset]);
+
+       return 0;
+}
+
+/*
+ * The interrupt handling happens in the parent interrupt controller,
+ * we don't do anything here.
+ */
+static struct irq_chip msc313_gpio_irqchip = {
+       .name = "GPIO",
+       .irq_eoi = irq_chip_eoi_parent,
+       .irq_mask = irq_chip_mask_parent,
+       .irq_unmask = irq_chip_unmask_parent,
+       .irq_set_type = irq_chip_set_type_parent,
+       .irq_set_affinity = irq_chip_set_affinity_parent,
+};
+
+/*
+ * The parent interrupt controller needs the GIC interrupt type set to GIC_SPI
+ * so we need to provide the fwspec. Essentially gpiochip_populate_parent_fwspec_twocell
+ * that puts GIC_SPI into the first cell.
+ */
+static void *msc313_gpio_populate_parent_fwspec(struct gpio_chip *gc,
+                                            unsigned int parent_hwirq,
+                                            unsigned int parent_type)
+{
+       struct irq_fwspec *fwspec;
+
+       fwspec = kmalloc(sizeof(*fwspec), GFP_KERNEL);
+       if (!fwspec)
+               return NULL;
+
+       fwspec->fwnode = gc->irq.parent_domain->fwnode;
+       fwspec->param_count = 3;
+       fwspec->param[0] = GIC_SPI;
+       fwspec->param[1] = parent_hwirq;
+       fwspec->param[2] = parent_type;
+
+       return fwspec;
+}
+
+static int msc313e_gpio_child_to_parent_hwirq(struct gpio_chip *chip,
+                                            unsigned int child,
+                                            unsigned int child_type,
+                                            unsigned int *parent,
+                                            unsigned int *parent_type)
+{
+       struct msc313_gpio *priv = gpiochip_get_data(chip);
+       unsigned int offset = priv->gpio_data->offsets[child];
+
+       /*
+        * only the spi0 pins have interrupts on the parent
+        * on all of the known chips and so far they are all
+        * mapped to the same place
+        */
+       if (offset >= OFF_SPI0_CZ && offset <= OFF_SPI0_DO) {
+               *parent_type = child_type;
+               *parent = ((offset - OFF_SPI0_CZ) >> 2) + 28;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int msc313_gpio_probe(struct platform_device *pdev)
+{
+       const struct msc313_gpio_data *match_data;
+       struct msc313_gpio *gpio;
+       struct gpio_chip *gpiochip;
+       struct gpio_irq_chip *gpioirqchip;
+       struct irq_domain *parent_domain;
+       struct device_node *parent_node;
+       struct device *dev = &pdev->dev;
+       int ret;
+
+       match_data = of_device_get_match_data(dev);
+       if (!match_data)
+               return -EINVAL;
+
+       parent_node = of_irq_find_parent(dev->of_node);
+       if (!parent_node)
+               return -ENODEV;
+
+       parent_domain = irq_find_host(parent_node);
+       if (!parent_domain)
+               return -ENODEV;
+
+       gpio = devm_kzalloc(dev, sizeof(*gpio), GFP_KERNEL);
+       if (!gpio)
+               return -ENOMEM;
+
+       gpio->gpio_data = match_data;
+
+       gpio->saved = devm_kcalloc(dev, gpio->gpio_data->num, sizeof(*gpio->saved), GFP_KERNEL);
+       if (!gpio->saved)
+               return -ENOMEM;
+
+       gpio->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(gpio->base))
+               return PTR_ERR(gpio->base);
+
+       platform_set_drvdata(pdev, gpio);
+
+       gpiochip = devm_kzalloc(dev, sizeof(*gpiochip), GFP_KERNEL);
+       if (!gpiochip)
+               return -ENOMEM;
+
+       gpiochip->label = DRIVER_NAME;
+       gpiochip->parent = dev;
+       gpiochip->request = gpiochip_generic_request;
+       gpiochip->free = gpiochip_generic_free;
+       gpiochip->direction_input = msc313_gpio_direction_input;
+       gpiochip->direction_output = msc313_gpio_direction_output;
+       gpiochip->get = msc313_gpio_get;
+       gpiochip->set = msc313_gpio_set;
+       gpiochip->base = -1;
+       gpiochip->ngpio = gpio->gpio_data->num;
+       gpiochip->names = gpio->gpio_data->names;
+
+       gpioirqchip = &gpiochip->irq;
+       gpioirqchip->chip = &msc313_gpio_irqchip;
+       gpioirqchip->fwnode = of_node_to_fwnode(dev->of_node);
+       gpioirqchip->parent_domain = parent_domain;
+       gpioirqchip->child_to_parent_hwirq = msc313e_gpio_child_to_parent_hwirq;
+       gpioirqchip->populate_parent_alloc_arg = msc313_gpio_populate_parent_fwspec;
+       gpioirqchip->handler = handle_bad_irq;
+       gpioirqchip->default_type = IRQ_TYPE_NONE;
+
+       ret = devm_gpiochip_add_data(dev, gpiochip, gpio);
+       return ret;
+}
+
+static int msc313_gpio_remove(struct platform_device *pdev)
+{
+       return 0;
+}
+
+static const struct of_device_id msc313_gpio_of_match[] = {
+#ifdef CONFIG_MACH_INFINITY
+       {
+               .compatible = "mstar,msc313-gpio",
+               .data = &msc313_data,
+       },
+#endif
+       { }
+};
+
+/*
+ * The GPIO controller loses the state of the registers when the
+ * SoC goes into suspend to memory mode so we need to save some
+ * of the register bits before suspending and put it back when resuming
+ */
+static int __maybe_unused msc313_gpio_suspend(struct device *dev)
+{
+       struct msc313_gpio *gpio = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < gpio->gpio_data->num; i++)
+               gpio->saved[i] = readb_relaxed(gpio->base + gpio->gpio_data->offsets[i]) & MSC313_GPIO_BITSTOSAVE;
+
+       return 0;
+}
+
+static int __maybe_unused msc313_gpio_resume(struct device *dev)
+{
+       struct msc313_gpio *gpio = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < gpio->gpio_data->num; i++)
+               writeb_relaxed(gpio->saved[i], gpio->base + gpio->gpio_data->offsets[i]);
+
+       return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(msc313_gpio_ops, msc313_gpio_suspend, msc313_gpio_resume);
+
+static struct platform_driver msc313_gpio_driver = {
+       .driver = {
+               .name = DRIVER_NAME,
+               .of_match_table = msc313_gpio_of_match,
+               .pm = &msc313_gpio_ops,
+       },
+       .probe = msc313_gpio_probe,
+       .remove = msc313_gpio_remove,
+};
+
+builtin_platform_driver(msc313_gpio_driver);
index 2f24559..672681a 100644 (file)
@@ -78,8 +78,7 @@
 
 /*
  * The Armada XP has per-CPU registers for interrupt cause, interrupt
- * mask and interrupt level mask. Those are relative to the
- * percpu_membase.
+ * mask and interrupt level mask. Those are in percpu_regs range.
  */
 #define GPIO_EDGE_CAUSE_ARMADAXP_OFF(cpu) ((cpu) * 0x4)
 #define GPIO_EDGE_MASK_ARMADAXP_OFF(cpu)  (0x10 + (cpu) * 0x4)
@@ -93,7 +92,7 @@
 #define MVEBU_MAX_GPIO_PER_BANK                32
 
 struct mvebu_pwm {
-       void __iomem            *membase;
+       struct regmap           *regs;
        unsigned long            clk_rate;
        struct gpio_desc        *gpiod;
        struct pwm_chip          chip;
@@ -279,17 +278,17 @@ mvebu_gpio_write_level_mask(struct mvebu_gpio_chip *mvchip, u32 val)
 }
 
 /*
- * Functions returning addresses of individual registers for a given
+ * Functions returning offsets of individual registers for a given
  * PWM controller.
  */
-static void __iomem *mvebu_pwmreg_blink_on_duration(struct mvebu_pwm *mvpwm)
+static unsigned int mvebu_pwmreg_blink_on_duration(struct mvebu_pwm *mvpwm)
 {
-       return mvpwm->membase + PWM_BLINK_ON_DURATION_OFF;
+       return PWM_BLINK_ON_DURATION_OFF;
 }
 
-static void __iomem *mvebu_pwmreg_blink_off_duration(struct mvebu_pwm *mvpwm)
+static unsigned int mvebu_pwmreg_blink_off_duration(struct mvebu_pwm *mvpwm)
 {
-       return mvpwm->membase + PWM_BLINK_OFF_DURATION_OFF;
+       return PWM_BLINK_OFF_DURATION_OFF;
 }
 
 /*
@@ -600,6 +599,13 @@ static void mvebu_gpio_irq_handler(struct irq_desc *desc)
        chained_irq_exit(chip, desc);
 }
 
+static const struct regmap_config mvebu_gpio_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .fast_io = true,
+};
+
 /*
  * Functions implementing the pwm_chip methods
  */
@@ -660,9 +666,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
 
        spin_lock_irqsave(&mvpwm->lock, flags);
 
-       val = (unsigned long long)
-               readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
-       val *= NSEC_PER_SEC;
+       regmap_read(mvpwm->regs, mvebu_pwmreg_blink_on_duration(mvpwm), &u);
+       val = (unsigned long long) u * NSEC_PER_SEC;
        do_div(val, mvpwm->clk_rate);
        if (val > UINT_MAX)
                state->duty_cycle = UINT_MAX;
@@ -671,9 +676,8 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
        else
                state->duty_cycle = 1;
 
-       val = (unsigned long long)
-               readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
-       val *= NSEC_PER_SEC;
+       regmap_read(mvpwm->regs, mvebu_pwmreg_blink_off_duration(mvpwm), &u);
+       val = (unsigned long long) u * NSEC_PER_SEC;
        do_div(val, mvpwm->clk_rate);
        if (val < state->duty_cycle) {
                state->period = 1;
@@ -726,8 +730,8 @@ static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        spin_lock_irqsave(&mvpwm->lock, flags);
 
-       writel_relaxed(on, mvebu_pwmreg_blink_on_duration(mvpwm));
-       writel_relaxed(off, mvebu_pwmreg_blink_off_duration(mvpwm));
+       regmap_write(mvpwm->regs, mvebu_pwmreg_blink_on_duration(mvpwm), on);
+       regmap_write(mvpwm->regs, mvebu_pwmreg_blink_off_duration(mvpwm), off);
        if (state->enabled)
                mvebu_gpio_blink(&mvchip->chip, pwm->hwpwm, 1);
        else
@@ -752,10 +756,10 @@ static void __maybe_unused mvebu_pwm_suspend(struct mvebu_gpio_chip *mvchip)
 
        regmap_read(mvchip->regs, GPIO_BLINK_CNT_SELECT_OFF + mvchip->offset,
                    &mvpwm->blink_select);
-       mvpwm->blink_on_duration =
-               readl_relaxed(mvebu_pwmreg_blink_on_duration(mvpwm));
-       mvpwm->blink_off_duration =
-               readl_relaxed(mvebu_pwmreg_blink_off_duration(mvpwm));
+       regmap_read(mvpwm->regs, mvebu_pwmreg_blink_on_duration(mvpwm),
+                   &mvpwm->blink_on_duration);
+       regmap_read(mvpwm->regs, mvebu_pwmreg_blink_off_duration(mvpwm),
+                   &mvpwm->blink_off_duration);
 }
 
 static void __maybe_unused mvebu_pwm_resume(struct mvebu_gpio_chip *mvchip)
@@ -764,10 +768,10 @@ static void __maybe_unused mvebu_pwm_resume(struct mvebu_gpio_chip *mvchip)
 
        regmap_write(mvchip->regs, GPIO_BLINK_CNT_SELECT_OFF + mvchip->offset,
                     mvpwm->blink_select);
-       writel_relaxed(mvpwm->blink_on_duration,
-                      mvebu_pwmreg_blink_on_duration(mvpwm));
-       writel_relaxed(mvpwm->blink_off_duration,
-                      mvebu_pwmreg_blink_off_duration(mvpwm));
+       regmap_write(mvpwm->regs, mvebu_pwmreg_blink_on_duration(mvpwm),
+                    mvpwm->blink_on_duration);
+       regmap_write(mvpwm->regs, mvebu_pwmreg_blink_off_duration(mvpwm),
+                    mvpwm->blink_off_duration);
 }
 
 static int mvebu_pwm_probe(struct platform_device *pdev,
@@ -776,6 +780,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 {
        struct device *dev = &pdev->dev;
        struct mvebu_pwm *mvpwm;
+       void __iomem *base;
        u32 set;
 
        if (!of_device_is_compatible(mvchip->chip.of_node,
@@ -813,9 +818,14 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
        mvchip->mvpwm = mvpwm;
        mvpwm->mvchip = mvchip;
 
-       mvpwm->membase = devm_platform_ioremap_resource_byname(pdev, "pwm");
-       if (IS_ERR(mvpwm->membase))
-               return PTR_ERR(mvpwm->membase);
+       base = devm_platform_ioremap_resource_byname(pdev, "pwm");
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       mvpwm->regs = devm_regmap_init_mmio(&pdev->dev, base,
+                                           &mvebu_gpio_regmap_config);
+       if (IS_ERR(mvpwm->regs))
+               return PTR_ERR(mvpwm->regs);
 
        mvpwm->clk_rate = clk_get_rate(mvchip->clk);
        if (!mvpwm->clk_rate) {
@@ -1022,13 +1032,6 @@ static int mvebu_gpio_resume(struct platform_device *pdev)
        return 0;
 }
 
-static const struct regmap_config mvebu_gpio_regmap_config = {
-       .reg_bits = 32,
-       .reg_stride = 4,
-       .val_bits = 32,
-       .fast_io = true,
-};
-
 static int mvebu_gpio_probe_raw(struct platform_device *pdev,
                                struct mvebu_gpio_chip *mvchip)
 {
index 643f4c5..157106e 100644 (file)
 #include <linux/of_device.h>
 #include <linux/bug.h>
 
-enum mxc_gpio_hwtype {
-       IMX1_GPIO,      /* runs on i.mx1 */
-       IMX21_GPIO,     /* runs on i.mx21 and i.mx27 */
-       IMX31_GPIO,     /* runs on i.mx31 */
-       IMX35_GPIO,     /* runs on all other i.mx */
-};
-
 /* device type dependent stuff */
 struct mxc_gpio_hwdata {
        unsigned dr_reg;
@@ -68,6 +61,7 @@ struct mxc_gpio_port {
        u32 both_edges;
        struct mxc_gpio_reg_saved gpio_saved_reg;
        bool power_off;
+       const struct mxc_gpio_hwdata *hwdata;
 };
 
 static struct mxc_gpio_hwdata imx1_imx21_gpio_hwdata = {
@@ -115,48 +109,27 @@ static struct mxc_gpio_hwdata imx35_gpio_hwdata = {
        .fall_edge      = 0x03,
 };
 
-static enum mxc_gpio_hwtype mxc_gpio_hwtype;
-static struct mxc_gpio_hwdata *mxc_gpio_hwdata;
-
-#define GPIO_DR                        (mxc_gpio_hwdata->dr_reg)
-#define GPIO_GDIR              (mxc_gpio_hwdata->gdir_reg)
-#define GPIO_PSR               (mxc_gpio_hwdata->psr_reg)
-#define GPIO_ICR1              (mxc_gpio_hwdata->icr1_reg)
-#define GPIO_ICR2              (mxc_gpio_hwdata->icr2_reg)
-#define GPIO_IMR               (mxc_gpio_hwdata->imr_reg)
-#define GPIO_ISR               (mxc_gpio_hwdata->isr_reg)
-#define GPIO_EDGE_SEL          (mxc_gpio_hwdata->edge_sel_reg)
-
-#define GPIO_INT_LOW_LEV       (mxc_gpio_hwdata->low_level)
-#define GPIO_INT_HIGH_LEV      (mxc_gpio_hwdata->high_level)
-#define GPIO_INT_RISE_EDGE     (mxc_gpio_hwdata->rise_edge)
-#define GPIO_INT_FALL_EDGE     (mxc_gpio_hwdata->fall_edge)
+#define GPIO_DR                        (port->hwdata->dr_reg)
+#define GPIO_GDIR              (port->hwdata->gdir_reg)
+#define GPIO_PSR               (port->hwdata->psr_reg)
+#define GPIO_ICR1              (port->hwdata->icr1_reg)
+#define GPIO_ICR2              (port->hwdata->icr2_reg)
+#define GPIO_IMR               (port->hwdata->imr_reg)
+#define GPIO_ISR               (port->hwdata->isr_reg)
+#define GPIO_EDGE_SEL          (port->hwdata->edge_sel_reg)
+
+#define GPIO_INT_LOW_LEV       (port->hwdata->low_level)
+#define GPIO_INT_HIGH_LEV      (port->hwdata->high_level)
+#define GPIO_INT_RISE_EDGE     (port->hwdata->rise_edge)
+#define GPIO_INT_FALL_EDGE     (port->hwdata->fall_edge)
 #define GPIO_INT_BOTH_EDGES    0x4
 
-static const struct platform_device_id mxc_gpio_devtype[] = {
-       {
-               .name = "imx1-gpio",
-               .driver_data = IMX1_GPIO,
-       }, {
-               .name = "imx21-gpio",
-               .driver_data = IMX21_GPIO,
-       }, {
-               .name = "imx31-gpio",
-               .driver_data = IMX31_GPIO,
-       }, {
-               .name = "imx35-gpio",
-               .driver_data = IMX35_GPIO,
-       }, {
-               /* sentinel */
-       }
-};
-
 static const struct of_device_id mxc_gpio_dt_ids[] = {
-       { .compatible = "fsl,imx1-gpio", .data = &mxc_gpio_devtype[IMX1_GPIO], },
-       { .compatible = "fsl,imx21-gpio", .data = &mxc_gpio_devtype[IMX21_GPIO], },
-       { .compatible = "fsl,imx31-gpio", .data = &mxc_gpio_devtype[IMX31_GPIO], },
-       { .compatible = "fsl,imx35-gpio", .data = &mxc_gpio_devtype[IMX35_GPIO], },
-       { .compatible = "fsl,imx7d-gpio", .data = &mxc_gpio_devtype[IMX35_GPIO], },
+       { .compatible = "fsl,imx1-gpio", .data =  &imx1_imx21_gpio_hwdata },
+       { .compatible = "fsl,imx21-gpio", .data = &imx1_imx21_gpio_hwdata },
+       { .compatible = "fsl,imx31-gpio", .data = &imx31_gpio_hwdata },
+       { .compatible = "fsl,imx35-gpio", .data = &imx35_gpio_hwdata },
+       { .compatible = "fsl,imx7d-gpio", .data = &imx35_gpio_hwdata },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mxc_gpio_dt_ids);
@@ -372,36 +345,6 @@ static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base)
        return rv;
 }
 
-static void mxc_gpio_get_hw(struct platform_device *pdev)
-{
-       const struct of_device_id *of_id =
-                       of_match_device(mxc_gpio_dt_ids, &pdev->dev);
-       enum mxc_gpio_hwtype hwtype;
-
-       if (of_id)
-               pdev->id_entry = of_id->data;
-       hwtype = pdev->id_entry->driver_data;
-
-       if (mxc_gpio_hwtype) {
-               /*
-                * The driver works with a reasonable presupposition,
-                * that is all gpio ports must be the same type when
-                * running on one soc.
-                */
-               BUG_ON(mxc_gpio_hwtype != hwtype);
-               return;
-       }
-
-       if (hwtype == IMX35_GPIO)
-               mxc_gpio_hwdata = &imx35_gpio_hwdata;
-       else if (hwtype == IMX31_GPIO)
-               mxc_gpio_hwdata = &imx31_gpio_hwdata;
-       else
-               mxc_gpio_hwdata = &imx1_imx21_gpio_hwdata;
-
-       mxc_gpio_hwtype = hwtype;
-}
-
 static int mxc_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
 {
        struct mxc_gpio_port *port = gpiochip_get_data(gc);
@@ -417,14 +360,14 @@ static int mxc_gpio_probe(struct platform_device *pdev)
        int irq_base;
        int err;
 
-       mxc_gpio_get_hw(pdev);
-
        port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL);
        if (!port)
                return -ENOMEM;
 
        port->dev = &pdev->dev;
 
+       port->hwdata = device_get_match_data(&pdev->dev);
+
        port->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(port->base))
                return PTR_ERR(port->base);
@@ -461,7 +404,7 @@ static int mxc_gpio_probe(struct platform_device *pdev)
        writel(0, port->base + GPIO_IMR);
        writel(~0, port->base + GPIO_ISR);
 
-       if (mxc_gpio_hwtype == IMX21_GPIO) {
+       if (of_device_is_compatible(np, "fsl,imx21-gpio")) {
                /*
                 * Setup one handler for all GPIO interrupts. Actually setting
                 * the handler is needed only once, but doing it for every port
@@ -596,7 +539,6 @@ static struct platform_driver mxc_gpio_driver = {
                .suppress_bind_attrs = true,
        },
        .probe          = mxc_gpio_probe,
-       .id_table       = mxc_gpio_devtype,
 };
 
 static int __init gpio_mxc_init(void)
index c4a314c..dfc0c1e 100644 (file)
@@ -254,19 +254,6 @@ static int mxs_gpio_get_direction(struct gpio_chip *gc, unsigned offset)
        return GPIO_LINE_DIRECTION_IN;
 }
 
-static const struct platform_device_id mxs_gpio_ids[] = {
-       {
-               .name = "imx23-gpio",
-               .driver_data = IMX23_GPIO,
-       }, {
-               .name = "imx28-gpio",
-               .driver_data = IMX28_GPIO,
-       }, {
-               /* sentinel */
-       }
-};
-MODULE_DEVICE_TABLE(platform, mxs_gpio_ids);
-
 static const struct of_device_id mxs_gpio_dt_ids[] = {
        { .compatible = "fsl,imx23-gpio", .data = (void *) IMX23_GPIO, },
        { .compatible = "fsl,imx28-gpio", .data = (void *) IMX28_GPIO, },
@@ -370,7 +357,6 @@ static struct platform_driver mxs_gpio_driver = {
                .suppress_bind_attrs = true,
        },
        .probe          = mxs_gpio_probe,
-       .id_table       = mxs_gpio_ids,
 };
 
 static int __init mxs_gpio_init(void)
index f7ceb2b..41952bb 100644 (file)
@@ -1049,11 +1049,8 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
        irq->first = irq_base;
 
        ret = gpiochip_add_data(&bank->chip, bank);
-       if (ret) {
-               dev_err(bank->chip.parent,
-                       "Could not register gpio chip %d\n", ret);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(bank->chip.parent, ret, "Could not register gpio chip\n");
 
        ret = devm_request_irq(bank->chip.parent, bank->irq,
                               omap_gpio_irq_handler,
index 3ef19ce..0b572db 100644 (file)
@@ -32,6 +32,11 @@ struct gpio_rcar_bank_info {
        u32 intmsk;
 };
 
+struct gpio_rcar_info {
+       bool has_outdtsel;
+       bool has_both_edge_trigger;
+};
+
 struct gpio_rcar_priv {
        void __iomem *base;
        spinlock_t lock;
@@ -40,24 +45,23 @@ struct gpio_rcar_priv {
        struct irq_chip irq_chip;
        unsigned int irq_parent;
        atomic_t wakeup_path;
-       bool has_outdtsel;
-       bool has_both_edge_trigger;
+       struct gpio_rcar_info info;
        struct gpio_rcar_bank_info bank_info;
 };
 
-#define IOINTSEL 0x00  /* General IO/Interrupt Switching Register */
-#define INOUTSEL 0x04  /* General Input/Output Switching Register */
-#define OUTDT 0x08     /* General Output Register */
-#define INDT 0x0c      /* General Input Register */
-#define INTDT 0x10     /* Interrupt Display Register */
-#define INTCLR 0x14    /* Interrupt Clear Register */
-#define INTMSK 0x18    /* Interrupt Mask Register */
-#define MSKCLR 0x1c    /* Interrupt Mask Clear Register */
-#define POSNEG 0x20    /* Positive/Negative Logic Select Register */
-#define EDGLEVEL 0x24  /* Edge/level Select Register */
-#define FILONOFF 0x28  /* Chattering Prevention On/Off Register */
-#define OUTDTSEL 0x40  /* Output Data Select Register */
-#define BOTHEDGE 0x4c  /* One Edge/Both Edge Select Register */
+#define IOINTSEL       0x00    /* General IO/Interrupt Switching Register */
+#define INOUTSEL       0x04    /* General Input/Output Switching Register */
+#define OUTDT          0x08    /* General Output Register */
+#define INDT           0x0c    /* General Input Register */
+#define INTDT          0x10    /* Interrupt Display Register */
+#define INTCLR         0x14    /* Interrupt Clear Register */
+#define INTMSK         0x18    /* Interrupt Mask Register */
+#define MSKCLR         0x1c    /* Interrupt Mask Clear Register */
+#define POSNEG         0x20    /* Positive/Negative Logic Select Register */
+#define EDGLEVEL       0x24    /* Edge/level Select Register */
+#define FILONOFF       0x28    /* Chattering Prevention On/Off Register */
+#define OUTDTSEL       0x40    /* Output Data Select Register */
+#define BOTHEDGE       0x4c    /* One Edge/Both Edge Select Register */
 
 #define RCAR_MAX_GPIO_PER_BANK         32
 
@@ -123,7 +127,7 @@ static void gpio_rcar_config_interrupt_input_mode(struct gpio_rcar_priv *p,
        gpio_rcar_modify_bit(p, EDGLEVEL, hwirq, !level_trigger);
 
        /* Select one edge or both edges in BOTHEDGE */
-       if (p->has_both_edge_trigger)
+       if (p->info.has_both_edge_trigger)
                gpio_rcar_modify_bit(p, BOTHEDGE, hwirq, both);
 
        /* Select "Interrupt Input Mode" in IOINTSEL */
@@ -162,7 +166,7 @@ static int gpio_rcar_irq_set_type(struct irq_data *d, unsigned int type)
                                                      false);
                break;
        case IRQ_TYPE_EDGE_BOTH:
-               if (!p->has_both_edge_trigger)
+               if (!p->info.has_both_edge_trigger)
                        return -EINVAL;
                gpio_rcar_config_interrupt_input_mode(p, hwirq, true, false,
                                                      true);
@@ -238,7 +242,7 @@ static void gpio_rcar_config_general_input_output_mode(struct gpio_chip *chip,
        gpio_rcar_modify_bit(p, INOUTSEL, gpio, output);
 
        /* Select General Output Register to output data in OUTDTSEL */
-       if (p->has_outdtsel && output)
+       if (p->info.has_outdtsel && output)
                gpio_rcar_modify_bit(p, OUTDTSEL, gpio, false);
 
        spin_unlock_irqrestore(&p->lock, flags);
@@ -295,14 +299,44 @@ static int gpio_rcar_direction_input(struct gpio_chip *chip, unsigned offset)
 
 static int gpio_rcar_get(struct gpio_chip *chip, unsigned offset)
 {
+       struct gpio_rcar_priv *p = gpiochip_get_data(chip);
        u32 bit = BIT(offset);
 
        /* testing on r8a7790 shows that INDT does not show correct pin state
         * when configured as output, so use OUTDT in case of output pins */
-       if (gpio_rcar_read(gpiochip_get_data(chip), INOUTSEL) & bit)
-               return !!(gpio_rcar_read(gpiochip_get_data(chip), OUTDT) & bit);
+       if (gpio_rcar_read(p, INOUTSEL) & bit)
+               return !!(gpio_rcar_read(p, OUTDT) & bit);
        else
-               return !!(gpio_rcar_read(gpiochip_get_data(chip), INDT) & bit);
+               return !!(gpio_rcar_read(p, INDT) & bit);
+}
+
+static int gpio_rcar_get_multiple(struct gpio_chip *chip, unsigned long *mask,
+                                 unsigned long *bits)
+{
+       struct gpio_rcar_priv *p = gpiochip_get_data(chip);
+       u32 bankmask, outputs, m, val = 0;
+       unsigned long flags;
+
+       bankmask = mask[0] & GENMASK(chip->ngpio - 1, 0);
+       if (chip->valid_mask)
+               bankmask &= chip->valid_mask[0];
+
+       if (!bankmask)
+               return 0;
+
+       spin_lock_irqsave(&p->lock, flags);
+       outputs = gpio_rcar_read(p, INOUTSEL);
+       m = outputs & bankmask;
+       if (m)
+               val |= gpio_rcar_read(p, OUTDT) & m;
+
+       m = ~outputs & bankmask;
+       if (m)
+               val |= gpio_rcar_read(p, INDT) & m;
+       spin_unlock_irqrestore(&p->lock, flags);
+
+       bits[0] = val;
+       return 0;
 }
 
 static void gpio_rcar_set(struct gpio_chip *chip, unsigned offset, int value)
@@ -346,11 +380,6 @@ static int gpio_rcar_direction_output(struct gpio_chip *chip, unsigned offset,
        return 0;
 }
 
-struct gpio_rcar_info {
-       bool has_outdtsel;
-       bool has_both_edge_trigger;
-};
-
 static const struct gpio_rcar_info gpio_rcar_info_gen1 = {
        .has_outdtsel = false,
        .has_both_edge_trigger = false,
@@ -417,8 +446,7 @@ static int gpio_rcar_parse_dt(struct gpio_rcar_priv *p, unsigned int *npins)
        int ret;
 
        info = of_device_get_match_data(p->dev);
-       p->has_outdtsel = info->has_outdtsel;
-       p->has_both_edge_trigger = info->has_both_edge_trigger;
+       p->info = *info;
 
        ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args);
        *npins = ret == 0 ? args.args[2] : RCAR_MAX_GPIO_PER_BANK;
@@ -479,6 +507,7 @@ static int gpio_rcar_probe(struct platform_device *pdev)
        gpio_chip->get_direction = gpio_rcar_get_direction;
        gpio_chip->direction_input = gpio_rcar_direction_input;
        gpio_chip->get = gpio_rcar_get;
+       gpio_chip->get_multiple = gpio_rcar_get_multiple;
        gpio_chip->direction_output = gpio_rcar_direction_output;
        gpio_chip->set = gpio_rcar_set;
        gpio_chip->set_multiple = gpio_rcar_set_multiple;
@@ -552,7 +581,7 @@ static int gpio_rcar_suspend(struct device *dev)
        p->bank_info.intmsk = gpio_rcar_read(p, INTMSK);
        p->bank_info.posneg = gpio_rcar_read(p, POSNEG);
        p->bank_info.edglevel = gpio_rcar_read(p, EDGLEVEL);
-       if (p->has_both_edge_trigger)
+       if (p->info.has_both_edge_trigger)
                p->bank_info.bothedge = gpio_rcar_read(p, BOTHEDGE);
 
        if (atomic_read(&p->wakeup_path))
index d5eb9ca..403f9e8 100644 (file)
@@ -29,7 +29,6 @@
 #define SIFIVE_GPIO_OUTPUT_XOR 0x40
 
 #define SIFIVE_GPIO_MAX                32
-#define SIFIVE_GPIO_IRQ_OFFSET 7
 
 struct sifive_gpio {
        void __iomem            *base;
@@ -37,7 +36,7 @@ struct sifive_gpio {
        struct regmap           *regs;
        unsigned long           irq_state;
        unsigned int            trigger[SIFIVE_GPIO_MAX];
-       unsigned int            irq_parent[SIFIVE_GPIO_MAX];
+       unsigned int            irq_number[SIFIVE_GPIO_MAX];
 };
 
 static void sifive_gpio_set_ie(struct sifive_gpio *chip, unsigned int offset)
@@ -128,6 +127,16 @@ static void sifive_gpio_irq_eoi(struct irq_data *d)
        irq_chip_eoi_parent(d);
 }
 
+static int sifive_gpio_irq_set_affinity(struct irq_data *data,
+                                       const struct cpumask *dest,
+                                       bool force)
+{
+       if (data->parent_data)
+               return irq_chip_set_affinity_parent(data, dest, force);
+
+       return -EINVAL;
+}
+
 static struct irq_chip sifive_gpio_irqchip = {
        .name           = "sifive-gpio",
        .irq_set_type   = sifive_gpio_irq_set_type,
@@ -136,6 +145,7 @@ static struct irq_chip sifive_gpio_irqchip = {
        .irq_enable     = sifive_gpio_irq_enable,
        .irq_disable    = sifive_gpio_irq_disable,
        .irq_eoi        = sifive_gpio_irq_eoi,
+       .irq_set_affinity = sifive_gpio_irq_set_affinity,
 };
 
 static int sifive_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
@@ -144,8 +154,12 @@ static int sifive_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
                                             unsigned int *parent,
                                             unsigned int *parent_type)
 {
+       struct sifive_gpio *chip = gpiochip_get_data(gc);
+       struct irq_data *d = irq_get_irq_data(chip->irq_number[child]);
+
        *parent_type = IRQ_TYPE_NONE;
-       *parent = child + SIFIVE_GPIO_IRQ_OFFSET;
+       *parent = irqd_to_hwirq(d);
+
        return 0;
 }
 
@@ -165,7 +179,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
        struct irq_domain *parent;
        struct gpio_irq_chip *girq;
        struct sifive_gpio *chip;
-       int ret, ngpio;
+       int ret, ngpio, i;
 
        chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
        if (!chip)
@@ -200,6 +214,9 @@ static int sifive_gpio_probe(struct platform_device *pdev)
                return -ENODEV;
        }
 
+       for (i = 0; i < ngpio; i++)
+               chip->irq_number[i] = platform_get_irq(pdev, i);
+
        ret = bgpio_init(&chip->gc, dev, 4,
                         chip->base + SIFIVE_GPIO_INPUT_VAL,
                         chip->base + SIFIVE_GPIO_OUTPUT_VAL,
index b0155d6..b94ef81 100644 (file)
@@ -474,15 +474,6 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
        stmpe_gpio->chip.parent = &pdev->dev;
        stmpe_gpio->chip.of_node = np;
        stmpe_gpio->chip.base = -1;
-       /*
-        * REVISIT: this makes sure the valid mask gets allocated and
-        * filled in when adding the gpio_chip, but the rest of the
-        * gpio_irqchip is still filled in using the old method
-        * in gpiochip_irqchip_add_nested() so clean this up once we
-        * get the gpio_irqchip to initialize while adding the
-        * gpio_chip also for threaded irqchips.
-        */
-       stmpe_gpio->chip.irq.init_valid_mask = stmpe_init_irq_valid_mask;
 
        if (IS_ENABLED(CONFIG_DEBUG_FS))
                 stmpe_gpio->chip.dbg_show = stmpe_dbg_show;
@@ -520,6 +511,7 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
                girq->default_type = IRQ_TYPE_NONE;
                girq->handler = handle_simple_irq;
                girq->threaded = true;
+               girq->init_valid_mask = stmpe_init_irq_valid_mask;
        }
 
        ret = gpiochip_add_data(&stmpe_gpio->chip, stmpe_gpio);
index 8656815..e19ebff 100644 (file)
@@ -61,8 +61,16 @@ struct tegra_gpio_info;
 struct tegra_gpio_bank {
        unsigned int bank;
        unsigned int irq;
-       spinlock_t lvl_lock[4];
-       spinlock_t dbc_lock[4]; /* Lock for updating debounce count register */
+
+       /*
+        * IRQ-core code uses raw locking, and thus, nested locking also
+        * should be raw in order not to trip spinlock debug warnings.
+        */
+       raw_spinlock_t lvl_lock[4];
+
+       /* Lock for updating debounce count register */
+       spinlock_t dbc_lock[4];
+
 #ifdef CONFIG_PM_SLEEP
        u32 cnf[4];
        u32 out[4];
@@ -334,14 +342,14 @@ static int tegra_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                return -EINVAL;
        }
 
-       spin_lock_irqsave(&bank->lvl_lock[port], flags);
+       raw_spin_lock_irqsave(&bank->lvl_lock[port], flags);
 
        val = tegra_gpio_readl(tgi, GPIO_INT_LVL(tgi, gpio));
        val &= ~(GPIO_INT_LVL_MASK << GPIO_BIT(gpio));
        val |= lvl_type << GPIO_BIT(gpio);
        tegra_gpio_writel(tgi, val, GPIO_INT_LVL(tgi, gpio));
 
-       spin_unlock_irqrestore(&bank->lvl_lock[port], flags);
+       raw_spin_unlock_irqrestore(&bank->lvl_lock[port], flags);
 
        tegra_gpio_mask_write(tgi, GPIO_MSK_OE(tgi, gpio), gpio, 0);
        tegra_gpio_enable(tgi, gpio);
@@ -560,6 +568,9 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
        SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
 };
 
+static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
+
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
        struct tegra_gpio_info *tgi;
@@ -661,6 +672,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
                bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
                irq_set_chip_data(irq, bank);
+               irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class);
                irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
        }
 
@@ -671,7 +683,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
                                                 tegra_gpio_irq_handler, bank);
 
                for (j = 0; j < 4; j++) {
-                       spin_lock_init(&bank->lvl_lock[j]);
+                       raw_spin_lock_init(&bank->lvl_lock[j]);
                        spin_lock_init(&bank->dbc_lock[j]);
                }
        }
index 9500074..286e0b1 100644 (file)
@@ -444,6 +444,16 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on)
        return 0;
 }
 
+static int tegra186_irq_set_affinity(struct irq_data *data,
+                                    const struct cpumask *dest,
+                                    bool force)
+{
+       if (data->parent_data)
+               return irq_chip_set_affinity_parent(data, dest, force);
+
+       return -EINVAL;
+}
+
 static void tegra186_gpio_irq(struct irq_desc *desc)
 {
        struct tegra_gpio *gpio = irq_desc_get_handler_data(desc);
@@ -690,6 +700,7 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
        gpio->intc.irq_unmask = tegra186_irq_unmask;
        gpio->intc.irq_set_type = tegra186_irq_set_type;
        gpio->intc.irq_set_wake = tegra186_irq_set_wake;
+       gpio->intc.irq_set_affinity = tegra186_irq_set_affinity;
 
        irq = &gpio->gpio.irq;
        irq->chip = &gpio->intc;
index 67f9f82..be53938 100644 (file)
@@ -6,13 +6,14 @@
  */
 
 #include <linux/bitops.h>
-#include <linux/init.h>
+#include <linux/clk.h>
 #include <linux/errno.h>
+#include <linux/gpio/driver.h>
+#include <linux/init.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
-#include <linux/io.h>
-#include <linux/gpio/driver.h>
 #include <linux/slab.h>
 
 /* Register Offset Definitions */
@@ -38,6 +39,7 @@
  * @gpio_state: GPIO state shadow register
  * @gpio_dir: GPIO direction shadow register
  * @gpio_lock: Lock used for synchronization
+ * @clk: clock resource for this driver
  */
 struct xgpio_instance {
        struct gpio_chip gc;
@@ -46,6 +48,7 @@ struct xgpio_instance {
        u32 gpio_state[2];
        u32 gpio_dir[2];
        spinlock_t gpio_lock[2];
+       struct clk *clk;
 };
 
 static inline int xgpio_index(struct xgpio_instance *chip, int gpio)
@@ -257,6 +260,23 @@ static void xgpio_save_regs(struct xgpio_instance *chip)
 }
 
 /**
+ * xgpio_remove - Remove method for the GPIO device.
+ * @pdev: pointer to the platform device
+ *
+ * This function remove gpiochips and frees all the allocated resources.
+ *
+ * Return: 0 always
+ */
+static int xgpio_remove(struct platform_device *pdev)
+{
+       struct xgpio_instance *gpio = platform_get_drvdata(pdev);
+
+       clk_disable_unprepare(gpio->clk);
+
+       return 0;
+}
+
+/**
  * xgpio_of_probe - Probe method for the GPIO device.
  * @pdev: pointer to the platform device
  *
@@ -278,7 +298,8 @@ static int xgpio_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, chip);
 
        /* Update GPIO state shadow register with default value */
-       of_property_read_u32(np, "xlnx,dout-default", &chip->gpio_state[0]);
+       if (of_property_read_u32(np, "xlnx,dout-default", &chip->gpio_state[0]))
+               chip->gpio_state[0] = 0x0;
 
        /* Update GPIO direction shadow register with default value */
        if (of_property_read_u32(np, "xlnx,tri-default", &chip->gpio_dir[0]))
@@ -298,8 +319,9 @@ static int xgpio_probe(struct platform_device *pdev)
 
        if (is_dual) {
                /* Update GPIO state shadow register with default value */
-               of_property_read_u32(np, "xlnx,dout-default-2",
-                                    &chip->gpio_state[1]);
+               if (of_property_read_u32(np, "xlnx,dout-default-2",
+                                        &chip->gpio_state[1]))
+                       chip->gpio_state[1] = 0x0;
 
                /* Update GPIO direction shadow register with default value */
                if (of_property_read_u32(np, "xlnx,tri-default-2",
@@ -334,11 +356,25 @@ static int xgpio_probe(struct platform_device *pdev)
                return PTR_ERR(chip->regs);
        }
 
+       chip->clk = devm_clk_get_optional(&pdev->dev, NULL);
+       if (IS_ERR(chip->clk)) {
+               if (PTR_ERR(chip->clk) != -EPROBE_DEFER)
+                       dev_dbg(&pdev->dev, "Input clock not found\n");
+               return PTR_ERR(chip->clk);
+       }
+
+       status = clk_prepare_enable(chip->clk);
+       if (status < 0) {
+               dev_err(&pdev->dev, "Failed to prepare clk\n");
+               return status;
+       }
+
        xgpio_save_regs(chip);
 
        status = devm_gpiochip_add_data(&pdev->dev, &chip->gc, chip);
        if (status) {
                dev_err(&pdev->dev, "failed to add GPIO chip\n");
+               clk_disable_unprepare(chip->clk);
                return status;
        }
 
@@ -354,6 +390,7 @@ MODULE_DEVICE_TABLE(of, xgpio_of_match);
 
 static struct platform_driver xgpio_plat_driver = {
        .probe          = xgpio_probe,
+       .remove         = xgpio_remove,
        .driver         = {
                        .name = "gpio-xilinx",
                        .of_match_table = xgpio_of_match,
index e2cac12..49c878c 100644 (file)
@@ -186,15 +186,7 @@ static int xra1403_probe(struct spi_device *spi)
                return ret;
        }
 
-       ret = devm_gpiochip_add_data(&spi->dev, &xra->chip, xra);
-       if (ret < 0) {
-               dev_err(&spi->dev, "Unable to register gpiochip\n");
-               return ret;
-       }
-
-       spi_set_drvdata(spi, xra);
-
-       return 0;
+       return devm_gpiochip_add_data(&spi->dev, &xra->chip, xra);
 }
 
 static const struct spi_device_id xra1403_ids[] = {
index 834a12f..e37a57d 100644 (file)
@@ -205,6 +205,68 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio)
                acpi_gpiochip_request_irq(acpi_gpio, event);
 }
 
+static enum gpiod_flags
+acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity)
+{
+       /* GpioInt() implies input configuration */
+       if (agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
+               return GPIOD_IN;
+
+       switch (agpio->io_restriction) {
+       case ACPI_IO_RESTRICT_INPUT:
+               return GPIOD_IN;
+       case ACPI_IO_RESTRICT_OUTPUT:
+               /*
+                * ACPI GPIO resources don't contain an initial value for the
+                * GPIO. Therefore we deduce that value from the pull field
+                * and the polarity instead. If the pin is pulled up we assume
+                * default to be high, if it is pulled down we assume default
+                * to be low, otherwise we leave pin untouched. For active low
+                * polarity values will be switched. See also
+                * Documentation/firmware-guide/acpi/gpio-properties.rst.
+                */
+               switch (agpio->pin_config) {
+               case ACPI_PIN_CONFIG_PULLUP:
+                       return polarity == GPIO_ACTIVE_LOW ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH;
+               case ACPI_PIN_CONFIG_PULLDOWN:
+                       return polarity == GPIO_ACTIVE_LOW ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+               default:
+                       break;
+               }
+               break;
+       default:
+               break;
+       }
+
+       /*
+        * Assume that the BIOS has configured the direction and pull
+        * accordingly.
+        */
+       return GPIOD_ASIS;
+}
+
+static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
+                                               struct acpi_resource_gpio *agpio,
+                                               unsigned int index,
+                                               const char *label)
+{
+       int polarity = GPIO_ACTIVE_HIGH;
+       enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity);
+       unsigned int pin = agpio->pin_table[index];
+       struct gpio_desc *desc;
+       int ret;
+
+       desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags);
+       if (IS_ERR(desc))
+               return desc;
+
+       ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout);
+       if (ret)
+               gpiochip_free_own_desc(desc);
+
+       return ret ? ERR_PTR(ret) : desc;
+}
+
 static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in)
 {
        const char *controller, *pin_str;
@@ -290,8 +352,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares,
        if (!handler)
                return AE_OK;
 
-       desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event",
-                                        GPIO_ACTIVE_HIGH, GPIOD_IN);
+       desc = acpi_request_own_gpiod(chip, agpio, 0, "ACPI:Event");
        if (IS_ERR(desc)) {
                dev_err(chip->parent,
                        "Failed to request GPIO for pin 0x%04X, err %ld\n",
@@ -526,39 +587,6 @@ static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
        return false;
 }
 
-static enum gpiod_flags
-acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio)
-{
-       switch (agpio->io_restriction) {
-       case ACPI_IO_RESTRICT_INPUT:
-               return GPIOD_IN;
-       case ACPI_IO_RESTRICT_OUTPUT:
-               /*
-                * ACPI GPIO resources don't contain an initial value for the
-                * GPIO. Therefore we deduce that value from the pull field
-                * instead. If the pin is pulled up we assume default to be
-                * high, if it is pulled down we assume default to be low,
-                * otherwise we leave pin untouched.
-                */
-               switch (agpio->pin_config) {
-               case ACPI_PIN_CONFIG_PULLUP:
-                       return GPIOD_OUT_HIGH;
-               case ACPI_PIN_CONFIG_PULLDOWN:
-                       return GPIOD_OUT_LOW;
-               default:
-                       break;
-               }
-       default:
-               break;
-       }
-
-       /*
-        * Assume that the BIOS has configured the direction and pull
-        * accordingly.
-        */
-       return GPIOD_ASIS;
-}
-
 static int
 __acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, enum gpiod_flags update)
 {
@@ -633,7 +661,7 @@ int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
 struct acpi_gpio_lookup {
        struct acpi_gpio_info info;
        int index;
-       int pin_index;
+       u16 pin_index;
        bool active_low;
        struct gpio_desc *desc;
        int n;
@@ -649,7 +677,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
        if (!lookup->desc) {
                const struct acpi_resource_gpio *agpio = &ares->data.gpio;
                bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
-               int pin_index;
+               u16 pin_index;
 
                if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
                        lookup->index++;
@@ -664,6 +692,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
                lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
                                              agpio->pin_table[pin_index]);
                lookup->info.pin_config = agpio->pin_config;
+               lookup->info.debounce = agpio->debounce_timeout;
                lookup->info.gpioint = gpioint;
 
                /*
@@ -674,13 +703,13 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
                 * - ACPI_ACTIVE_HIGH == GPIO_ACTIVE_HIGH
                 */
                if (lookup->info.gpioint) {
-                       lookup->info.flags = GPIOD_IN;
                        lookup->info.polarity = agpio->polarity;
                        lookup->info.triggering = agpio->triggering;
                } else {
-                       lookup->info.flags = acpi_gpio_to_gpiod_flags(agpio);
                        lookup->info.polarity = lookup->active_low;
                }
+
+               lookup->info.flags = acpi_gpio_to_gpiod_flags(agpio, lookup->info.polarity);
        }
 
        return 1;
@@ -794,7 +823,7 @@ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
                if (ret)
                        return ERR_PTR(ret);
 
-               dev_dbg(&adev->dev, "GPIO: _DSD returned %s %d %d %u\n",
+               dev_dbg(&adev->dev, "GPIO: _DSD returned %s %d %u %u\n",
                        dev_name(&lookup.info.adev->dev), lookup.index,
                        lookup.pin_index, lookup.active_low);
        } else {
@@ -942,6 +971,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
 
                if (info.gpioint && idx++ == index) {
                        unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
+                       enum gpiod_flags dflags = GPIOD_ASIS;
                        char label[32];
                        int irq;
 
@@ -952,11 +982,18 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
                        if (irq < 0)
                                return irq;
 
+                       acpi_gpio_update_gpiod_flags(&dflags, &info);
+                       acpi_gpio_update_gpiod_lookup_flags(&lflags, &info);
+
                        snprintf(label, sizeof(label), "GpioInt() %d", index);
-                       ret = gpiod_configure_flags(desc, label, lflags, info.flags);
+                       ret = gpiod_configure_flags(desc, label, lflags, dflags);
                        if (ret < 0)
                                return ret;
 
+                       ret = gpio_set_debounce_timeout(desc, info.debounce);
+                       if (ret)
+                               return ret;
+
                        irq_flags = acpi_dev_get_irq_type(info.triggering,
                                                          info.polarity);
 
@@ -982,7 +1019,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
        struct gpio_chip *chip = achip->chip;
        struct acpi_resource_gpio *agpio;
        struct acpi_resource *ares;
-       int pin_index = (int)address;
+       u16 pin_index = address;
        acpi_status status;
        int length;
        int i;
@@ -1005,7 +1042,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
                return AE_BAD_PARAMETER;
        }
 
-       length = min(agpio->pin_table_length, (u16)(pin_index + bits));
+       length = min_t(u16, agpio->pin_table_length, pin_index + bits);
        for (i = pin_index; i < length; ++i) {
                int pin = agpio->pin_table[i];
                struct acpi_gpio_connection *conn;
@@ -1042,23 +1079,18 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
                }
 
                if (!found) {
-                       enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio);
-                       const char *label = "ACPI:OpRegion";
-
-                       desc = gpiochip_request_own_desc(chip, pin, label,
-                                                        GPIO_ACTIVE_HIGH,
-                                                        flags);
+                       desc = acpi_request_own_gpiod(chip, agpio, i, "ACPI:OpRegion");
                        if (IS_ERR(desc)) {
-                               status = AE_ERROR;
                                mutex_unlock(&achip->conn_lock);
+                               status = AE_ERROR;
                                goto out;
                        }
 
                        conn = kzalloc(sizeof(*conn), GFP_KERNEL);
                        if (!conn) {
-                               status = AE_NO_MEMORY;
                                gpiochip_free_own_desc(desc);
                                mutex_unlock(&achip->conn_lock);
+                               status = AE_NO_MEMORY;
                                goto out;
                        }
 
@@ -1070,8 +1102,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
                mutex_unlock(&achip->conn_lock);
 
                if (function == ACPI_WRITE)
-                       gpiod_set_raw_value_cansleep(desc,
-                                                    !!((1 << i) & *value));
+                       gpiod_set_raw_value_cansleep(desc, !!(*value & BIT(i)));
                else
                        *value |= (u64)gpiod_get_raw_value_cansleep(desc) << i;
        }
@@ -1132,7 +1163,7 @@ acpi_gpiochip_parse_own_gpio(struct acpi_gpio_chip *achip,
        int ret;
 
        *lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
-       *dflags = 0;
+       *dflags = GPIOD_ASIS;
        *name = NULL;
 
        ret = fwnode_property_read_u32_array(fwnode, "gpios", gpios,
index 1c6d65c..e2edb63 100644 (file)
@@ -18,6 +18,7 @@ struct acpi_device;
  * @pin_config: pin bias as provided by ACPI
  * @polarity: interrupt polarity as provided by ACPI
  * @triggering: triggering type as provided by ACPI
+ * @debounce: debounce timeout as provided by ACPI
  * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping
  */
 struct acpi_gpio_info {
@@ -27,6 +28,7 @@ struct acpi_gpio_info {
        int pin_config;
        int polarity;
        int triggering;
+       unsigned int debounce;
        unsigned int quirks;
 };
 
index e9faeaf..12b679c 100644 (file)
@@ -428,6 +428,12 @@ struct line {
         */
        struct linereq *req;
        unsigned int irq;
+       /*
+        * eflags is set by edge_detector_setup(), edge_detector_stop() and
+        * edge_detector_update(), which are themselves mutually exclusive,
+        * and is accessed by edge_irq_thread() and debounce_work_func(),
+        * which can both live with a slightly stale value.
+        */
        u64 eflags;
        /*
         * timestamp_ns and req_seqno are accessed only by
@@ -504,11 +510,14 @@ struct linereq {
        (GPIO_V2_LINE_FLAG_EDGE_RISING | \
         GPIO_V2_LINE_FLAG_EDGE_FALLING)
 
+#define GPIO_V2_LINE_FLAG_EDGE_BOTH GPIO_V2_LINE_EDGE_FLAGS
+
 #define GPIO_V2_LINE_VALID_FLAGS \
        (GPIO_V2_LINE_FLAG_ACTIVE_LOW | \
         GPIO_V2_LINE_DIRECTION_FLAGS | \
         GPIO_V2_LINE_DRIVE_FLAGS | \
         GPIO_V2_LINE_EDGE_FLAGS | \
+        GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME | \
         GPIO_V2_LINE_BIAS_FLAGS)
 
 static void linereq_put_event(struct linereq *lr,
@@ -529,11 +538,20 @@ static void linereq_put_event(struct linereq *lr,
                pr_debug_ratelimited("event FIFO is full - event dropped\n");
 }
 
+static u64 line_event_timestamp(struct line *line)
+{
+       if (test_bit(FLAG_EVENT_CLOCK_REALTIME, &line->desc->flags))
+               return ktime_get_real_ns();
+
+       return ktime_get_ns();
+}
+
 static irqreturn_t edge_irq_thread(int irq, void *p)
 {
        struct line *line = p;
        struct linereq *lr = line->req;
        struct gpio_v2_line_event le;
+       u64 eflags;
 
        /* Do not leak kernel stack to userspace */
        memset(&le, 0, sizeof(le));
@@ -546,14 +564,14 @@ static irqreturn_t edge_irq_thread(int irq, void *p)
                 * which case we didn't get the timestamp from
                 * edge_irq_handler().
                 */
-               le.timestamp_ns = ktime_get_ns();
+               le.timestamp_ns = line_event_timestamp(line);
                if (lr->num_lines != 1)
                        line->req_seqno = atomic_inc_return(&lr->seqno);
        }
        line->timestamp_ns = 0;
 
-       if (line->eflags == (GPIO_V2_LINE_FLAG_EDGE_RISING |
-                            GPIO_V2_LINE_FLAG_EDGE_FALLING)) {
+       eflags = READ_ONCE(line->eflags);
+       if (eflags == GPIO_V2_LINE_FLAG_EDGE_BOTH) {
                int level = gpiod_get_value_cansleep(line->desc);
 
                if (level)
@@ -562,10 +580,10 @@ static irqreturn_t edge_irq_thread(int irq, void *p)
                else
                        /* Emit high-to-low event */
                        le.id = GPIO_V2_LINE_EVENT_FALLING_EDGE;
-       } else if (line->eflags == GPIO_V2_LINE_FLAG_EDGE_RISING) {
+       } else if (eflags == GPIO_V2_LINE_FLAG_EDGE_RISING) {
                /* Emit low-to-high event */
                le.id = GPIO_V2_LINE_EVENT_RISING_EDGE;
-       } else if (line->eflags == GPIO_V2_LINE_FLAG_EDGE_FALLING) {
+       } else if (eflags == GPIO_V2_LINE_FLAG_EDGE_FALLING) {
                /* Emit high-to-low event */
                le.id = GPIO_V2_LINE_EVENT_FALLING_EDGE;
        } else {
@@ -590,7 +608,7 @@ static irqreturn_t edge_irq_handler(int irq, void *p)
         * Just store the timestamp in hardirq context so we get it as
         * close in time as possible to the actual event.
         */
-       line->timestamp_ns = ktime_get_ns();
+       line->timestamp_ns = line_event_timestamp(line);
 
        if (lr->num_lines != 1)
                line->req_seqno = atomic_inc_return(&lr->seqno);
@@ -634,6 +652,7 @@ static void debounce_work_func(struct work_struct *work)
        struct line *line = container_of(work, struct line, work.work);
        struct linereq *lr;
        int level;
+       u64 eflags;
 
        level = gpiod_get_raw_value_cansleep(line->desc);
        if (level < 0) {
@@ -647,7 +666,8 @@ static void debounce_work_func(struct work_struct *work)
        WRITE_ONCE(line->level, level);
 
        /* -- edge detection -- */
-       if (!line->eflags)
+       eflags = READ_ONCE(line->eflags);
+       if (!eflags)
                return;
 
        /* switch from physical level to logical - if they differ */
@@ -655,15 +675,15 @@ static void debounce_work_func(struct work_struct *work)
                level = !level;
 
        /* ignore edges that are not being monitored */
-       if (((line->eflags == GPIO_V2_LINE_FLAG_EDGE_RISING) && !level) ||
-           ((line->eflags == GPIO_V2_LINE_FLAG_EDGE_FALLING) && level))
+       if (((eflags == GPIO_V2_LINE_FLAG_EDGE_RISING) && !level) ||
+           ((eflags == GPIO_V2_LINE_FLAG_EDGE_FALLING) && level))
                return;
 
        /* Do not leak kernel stack to userspace */
        memset(&le, 0, sizeof(le));
 
        lr = line->req;
-       le.timestamp_ns = ktime_get_ns();
+       le.timestamp_ns = line_event_timestamp(line);
        le.offset = gpio_chip_hwgpio(line->desc);
        line->line_seqno++;
        le.line_seqno = line->line_seqno;
@@ -755,7 +775,7 @@ static void edge_detector_stop(struct line *line)
 
        cancel_delayed_work_sync(&line->work);
        WRITE_ONCE(line->sw_debounced, 0);
-       line->eflags = 0;
+       WRITE_ONCE(line->eflags, 0);
        /* do not change line->level - see comment in debounced_value() */
 }
 
@@ -774,7 +794,7 @@ static int edge_detector_setup(struct line *line,
                if (ret)
                        return ret;
        }
-       line->eflags = eflags;
+       WRITE_ONCE(line->eflags, eflags);
        if (gpio_v2_line_config_debounced(lc, line_idx)) {
                debounce_period_us = gpio_v2_line_config_debounce_period(lc, line_idx);
                ret = debounce_setup(line, debounce_period_us);
@@ -817,13 +837,13 @@ static int edge_detector_update(struct line *line,
        unsigned int debounce_period_us =
                gpio_v2_line_config_debounce_period(lc, line_idx);
 
-       if ((line->eflags == eflags) && !polarity_change &&
+       if ((READ_ONCE(line->eflags) == eflags) && !polarity_change &&
            (READ_ONCE(line->desc->debounce_period_us) == debounce_period_us))
                return 0;
 
        /* sw debounced and still will be...*/
        if (debounce_period_us && READ_ONCE(line->sw_debounced)) {
-               line->eflags = eflags;
+               WRITE_ONCE(line->eflags, eflags);
                WRITE_ONCE(line->desc->debounce_period_us, debounce_period_us);
                return 0;
        }
@@ -967,6 +987,9 @@ static void gpio_v2_line_config_flags_to_desc_flags(u64 flags,
                   flags & GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN);
        assign_bit(FLAG_BIAS_DISABLE, flagsp,
                   flags & GPIO_V2_LINE_FLAG_BIAS_DISABLED);
+
+       assign_bit(FLAG_EVENT_CLOCK_REALTIME, flagsp,
+                  flags & GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME);
 }
 
 static long linereq_get_values(struct linereq *lr, void __user *ip)
@@ -1479,21 +1502,10 @@ static __poll_t lineevent_poll(struct file *file,
        return events;
 }
 
-static ssize_t lineevent_get_size(void)
-{
-#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
-       /* i386 has no padding after 'id' */
-       if (in_ia32_syscall()) {
-               struct compat_gpioeevent_data {
-                       compat_u64      timestamp;
-                       u32             id;
-               };
-
-               return sizeof(struct compat_gpioeevent_data);
-       }
-#endif
-       return sizeof(struct gpioevent_data);
-}
+struct compat_gpioeevent_data {
+       compat_u64      timestamp;
+       u32             id;
+};
 
 static ssize_t lineevent_read(struct file *file,
                              char __user *buf,
@@ -1515,7 +1527,10 @@ static ssize_t lineevent_read(struct file *file,
         * actual sizeof() and pass this as an argument to copy_to_user() to
         * drop unneeded bytes from the output.
         */
-       ge_size = lineevent_get_size();
+       if (compat_need_64bit_alignment_fixup())
+               ge_size = sizeof(struct compat_gpioeevent_data);
+       else
+               ge_size = sizeof(struct gpioevent_data);
        if (count < ge_size)
                return -EINVAL;
 
@@ -1910,6 +1925,7 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
            test_bit(FLAG_USED_AS_IRQ, &desc->flags) ||
            test_bit(FLAG_EXPORT, &desc->flags) ||
            test_bit(FLAG_SYSFS, &desc->flags) ||
+           !gpiochip_line_is_valid(gc, info->offset) ||
            !ok_for_pinctrl)
                info->flags |= GPIO_V2_LINE_FLAG_USED;
 
@@ -1938,6 +1954,9 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
        if (test_bit(FLAG_EDGE_FALLING, &desc->flags))
                info->flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
 
+       if (test_bit(FLAG_EVENT_CLOCK_REALTIME, &desc->flags))
+               info->flags |= GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME;
+
        debounce_period_us = READ_ONCE(desc->debounce_period_us);
        if (debounce_period_us) {
                info->attrs[num_attrs].id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
index 7dbce4c..4a517e5 100644 (file)
@@ -246,10 +246,8 @@ struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev,
        struct gpio_desc *desc;
 
        desc = devm_gpiod_get_index(dev, con_id, index, flags);
-       if (IS_ERR(desc)) {
-               if (PTR_ERR(desc) == -ENOENT)
-                       return NULL;
-       }
+       if (gpiod_not_found(desc))
+               return NULL;
 
        return desc;
 }
@@ -308,7 +306,7 @@ devm_gpiod_get_array_optional(struct device *dev, const char *con_id,
        struct gpio_descs *descs;
 
        descs = devm_gpiod_get_array(dev, con_id, flags);
-       if (PTR_ERR(descs) == -ENOENT)
+       if (gpiod_not_found(descs))
                return NULL;
 
        return descs;
@@ -479,9 +477,9 @@ void devm_gpio_free(struct device *dev, unsigned int gpio)
 }
 EXPORT_SYMBOL_GPL(devm_gpio_free);
 
-static void devm_gpio_chip_release(struct device *dev, void *res)
+static void devm_gpio_chip_release(void *data)
 {
-       struct gpio_chip *gc = *(struct gpio_chip **)res;
+       struct gpio_chip *gc = data;
 
        gpiochip_remove(gc);
 }
@@ -507,23 +505,12 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, vo
                                    struct lock_class_key *lock_key,
                                    struct lock_class_key *request_key)
 {
-       struct gpio_chip **ptr;
        int ret;
 
-       ptr = devres_alloc(devm_gpio_chip_release, sizeof(*ptr),
-                            GFP_KERNEL);
-       if (!ptr)
-               return -ENOMEM;
-
        ret = gpiochip_add_data_with_key(gc, data, lock_key, request_key);
-       if (ret < 0) {
-               devres_free(ptr);
+       if (ret < 0)
                return ret;
-       }
 
-       *ptr = gc;
-       devres_add(dev, ptr);
-
-       return 0;
+       return devm_add_action_or_reset(dev, devm_gpio_chip_release, gc);
 }
 EXPORT_SYMBOL_GPL(devm_gpiochip_add_data_with_key);
index 2f895a2..b4a7111 100644 (file)
@@ -509,31 +509,31 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
                desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
                                                &of_flags);
 
-               if (!IS_ERR(desc) || PTR_ERR(desc) != -ENOENT)
+               if (!gpiod_not_found(desc))
                        break;
        }
 
-       if (PTR_ERR(desc) == -ENOENT) {
+       if (gpiod_not_found(desc)) {
                /* Special handling for SPI GPIOs if used */
                desc = of_find_spi_gpio(dev, con_id, &of_flags);
        }
 
-       if (PTR_ERR(desc) == -ENOENT) {
+       if (gpiod_not_found(desc)) {
                /* This quirk looks up flags and all */
                desc = of_find_spi_cs_gpio(dev, con_id, idx, flags);
                if (!IS_ERR(desc))
                        return desc;
        }
 
-       if (PTR_ERR(desc) == -ENOENT) {
+       if (gpiod_not_found(desc)) {
                /* Special handling for regulator GPIOs if used */
                desc = of_find_regulator_gpio(dev, con_id, &of_flags);
        }
 
-       if (PTR_ERR(desc) == -ENOENT)
+       if (gpiod_not_found(desc))
                desc = of_find_arizona_gpio(dev, con_id, &of_flags);
 
-       if (PTR_ERR(desc) == -ENOENT)
+       if (gpiod_not_found(desc))
                desc = of_find_usb_gpio(dev, con_id, &of_flags);
 
        if (IS_ERR(desc))
@@ -593,7 +593,7 @@ static struct gpio_desc *of_parse_own_gpio(struct device_node *np,
 
        xlate_flags = 0;
        *lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
-       *dflags = 0;
+       *dflags = GPIOD_ASIS;
 
        ret = of_property_read_u32(chip_np, "#gpio-cells", &tmp);
        if (ret)
index 728f6c6..26c5466 100644 (file)
@@ -476,7 +476,7 @@ static ssize_t export_store(struct class *class,
         */
 
        status = gpiod_request(desc, "sysfs");
-       if (status < 0) {
+       if (status) {
                if (status == -EPROBE_DEFER)
                        status = -ENODEV;
                goto done;
index 6e3c4d7..b02cc2a 100644 (file)
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -119,7 +120,7 @@ struct gpio_desc *gpio_to_desc(unsigned gpio)
        spin_unlock_irqrestore(&gpio_lock, flags);
 
        if (!gpio_is_valid(gpio))
-               WARN(1, "invalid GPIO %d\n", gpio);
+               pr_warn("invalid GPIO %d\n", gpio);
 
        return NULL;
 }
@@ -211,7 +212,7 @@ static int gpiochip_find_base(int ngpio)
 int gpiod_get_direction(struct gpio_desc *desc)
 {
        struct gpio_chip *gc;
-       unsigned offset;
+       unsigned int offset;
        int ret;
 
        gc = gpiod_to_chip(desc);
@@ -771,9 +772,11 @@ err_free_ida:
        ida_free(&gpio_ida, gdev->id);
 err_free_gdev:
        /* failures here can mean systems won't boot... */
-       pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
-              gdev->base, gdev->base + gdev->ngpio - 1,
-              gc->label ? : "generic", ret);
+       if (ret != -EPROBE_DEFER) {
+               pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
+                      gdev->base, gdev->base + gdev->ngpio - 1,
+                      gc->label ? : "generic", ret);
+       }
        kfree(gdev);
        return ret;
 }
@@ -936,67 +939,6 @@ bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc,
 }
 EXPORT_SYMBOL_GPL(gpiochip_irqchip_irq_valid);
 
-/**
- * gpiochip_set_cascaded_irqchip() - connects a cascaded irqchip to a gpiochip
- * @gc: the gpiochip to set the irqchip chain to
- * @parent_irq: the irq number corresponding to the parent IRQ for this
- * cascaded irqchip
- * @parent_handler: the parent interrupt handler for the accumulated IRQ
- * coming out of the gpiochip. If the interrupt is nested rather than
- * cascaded, pass NULL in this handler argument
- */
-static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gc,
-                                         unsigned int parent_irq,
-                                         irq_flow_handler_t parent_handler)
-{
-       struct gpio_irq_chip *girq = &gc->irq;
-       struct device *dev = &gc->gpiodev->dev;
-
-       if (!girq->domain) {
-               chip_err(gc, "called %s before setting up irqchip\n",
-                        __func__);
-               return;
-       }
-
-       if (parent_handler) {
-               if (gc->can_sleep) {
-                       chip_err(gc,
-                                "you cannot have chained interrupts on a chip that may sleep\n");
-                       return;
-               }
-               girq->parents = devm_kcalloc(dev, 1,
-                                            sizeof(*girq->parents),
-                                            GFP_KERNEL);
-               if (!girq->parents) {
-                       chip_err(gc, "out of memory allocating parent IRQ\n");
-                       return;
-               }
-               girq->parents[0] = parent_irq;
-               girq->num_parents = 1;
-               /*
-                * The parent irqchip is already using the chip_data for this
-                * irqchip, so our callbacks simply use the handler_data.
-                */
-               irq_set_chained_handler_and_data(parent_irq, parent_handler,
-                                                gc);
-       }
-}
-
-/**
- * gpiochip_set_nested_irqchip() - connects a nested irqchip to a gpiochip
- * @gc: the gpiochip to set the irqchip nested handler to
- * @irqchip: the irqchip to nest to the gpiochip
- * @parent_irq: the irq number corresponding to the parent IRQ for this
- * nested irqchip
- */
-void gpiochip_set_nested_irqchip(struct gpio_chip *gc,
-                                struct irq_chip *irqchip,
-                                unsigned int parent_irq)
-{
-       gpiochip_set_cascaded_irqchip(gc, parent_irq, NULL);
-}
-EXPORT_SYMBOL_GPL(gpiochip_set_nested_irqchip);
-
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
 
 /**
@@ -1394,7 +1336,7 @@ void gpiochip_irq_domain_deactivate(struct irq_domain *domain,
 }
 EXPORT_SYMBOL_GPL(gpiochip_irq_domain_deactivate);
 
-static int gpiochip_to_irq(struct gpio_chip *gc, unsigned offset)
+static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset)
 {
        struct irq_domain *domain = gc->irq.domain;
 
@@ -1477,7 +1419,8 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gc)
        if (WARN_ON(gc->irq.irq_enable))
                return;
        /* Check if the irqchip already has this hook... */
-       if (irqchip->irq_enable == gpiochip_irq_enable) {
+       if (irqchip->irq_enable == gpiochip_irq_enable ||
+               irqchip->irq_mask == gpiochip_irq_mask) {
                /*
                 * ...and if so, give a gentle warning that this is bad
                 * practice.
@@ -1648,98 +1591,6 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
 }
 
 /**
- * gpiochip_irqchip_add_key() - adds an irqchip to a gpiochip
- * @gc: the gpiochip to add the irqchip to
- * @irqchip: the irqchip to add to the gpiochip
- * @first_irq: if not dynamically assigned, the base (first) IRQ to
- * allocate gpiochip irqs from
- * @handler: the irq handler to use (often a predefined irq core function)
- * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
- * to have the core avoid setting up any default type in the hardware.
- * @threaded: whether this irqchip uses a nested thread handler
- * @lock_key: lockdep class for IRQ lock
- * @request_key: lockdep class for IRQ request
- *
- * This function closely associates a certain irqchip with a certain
- * gpiochip, providing an irq domain to translate the local IRQs to
- * global irqs in the gpiolib core, and making sure that the gpiochip
- * is passed as chip data to all related functions. Driver callbacks
- * need to use gpiochip_get_data() to get their local state containers back
- * from the gpiochip passed as chip data. An irqdomain will be stored
- * in the gpiochip that shall be used by the driver to handle IRQ number
- * translation. The gpiochip will need to be initialized and registered
- * before calling this function.
- *
- * This function will handle two cell:ed simple IRQs and assumes all
- * the pins on the gpiochip can generate a unique IRQ. Everything else
- * need to be open coded.
- */
-int gpiochip_irqchip_add_key(struct gpio_chip *gc,
-                            struct irq_chip *irqchip,
-                            unsigned int first_irq,
-                            irq_flow_handler_t handler,
-                            unsigned int type,
-                            bool threaded,
-                            struct lock_class_key *lock_key,
-                            struct lock_class_key *request_key)
-{
-       struct device_node *of_node;
-
-       if (!gc || !irqchip)
-               return -EINVAL;
-
-       if (!gc->parent) {
-               chip_err(gc, "missing gpiochip .dev parent pointer\n");
-               return -EINVAL;
-       }
-       gc->irq.threaded = threaded;
-       of_node = gc->parent->of_node;
-#ifdef CONFIG_OF_GPIO
-       /*
-        * If the gpiochip has an assigned OF node this takes precedence
-        * FIXME: get rid of this and use gc->parent->of_node
-        * everywhere
-        */
-       if (gc->of_node)
-               of_node = gc->of_node;
-#endif
-       /*
-        * Specifying a default trigger is a terrible idea if DT or ACPI is
-        * used to configure the interrupts, as you may end-up with
-        * conflicting triggers. Tell the user, and reset to NONE.
-        */
-       if (WARN(of_node && type != IRQ_TYPE_NONE,
-                "%pOF: Ignoring %d default trigger\n", of_node, type))
-               type = IRQ_TYPE_NONE;
-       if (has_acpi_companion(gc->parent) && type != IRQ_TYPE_NONE) {
-               acpi_handle_warn(ACPI_HANDLE(gc->parent),
-                                "Ignoring %d default trigger\n", type);
-               type = IRQ_TYPE_NONE;
-       }
-
-       gc->irq.chip = irqchip;
-       gc->irq.handler = handler;
-       gc->irq.default_type = type;
-       gc->to_irq = gpiochip_to_irq;
-       gc->irq.lock_key = lock_key;
-       gc->irq.request_key = request_key;
-       gc->irq.domain = irq_domain_add_simple(of_node,
-                                       gc->ngpio, first_irq,
-                                       &gpiochip_domain_ops, gc);
-       if (!gc->irq.domain) {
-               gc->irq.chip = NULL;
-               return -EINVAL;
-       }
-
-       gpiochip_set_irq_hooks(gc);
-
-       acpi_gpiochip_request_interrupts(gc);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key);
-
-/**
  * gpiochip_irqchip_add_domain() - adds an irqdomain to a gpiochip
  * @gc: the gpiochip to add the irqchip to
  * @domain: the irqdomain to add to the gpiochip
@@ -1788,7 +1639,7 @@ static inline void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc)
  * @gc: the gpiochip owning the GPIO
  * @offset: the offset of the GPIO to request for GPIO function
  */
-int gpiochip_generic_request(struct gpio_chip *gc, unsigned offset)
+int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset)
 {
 #ifdef CONFIG_PINCTRL
        if (list_empty(&gc->gpiodev->pin_ranges))
@@ -1804,7 +1655,7 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_request);
  * @gc: the gpiochip to request the gpio function for
  * @offset: the offset of the GPIO to free from GPIO function
  */
-void gpiochip_generic_free(struct gpio_chip *gc, unsigned offset)
+void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset)
 {
 #ifdef CONFIG_PINCTRL
        if (list_empty(&gc->gpiodev->pin_ranges))
@@ -1821,7 +1672,7 @@ EXPORT_SYMBOL_GPL(gpiochip_generic_free);
  * @offset: the offset of the GPIO to apply the configuration
  * @config: the configuration to be applied
  */
-int gpiochip_generic_config(struct gpio_chip *gc, unsigned offset,
+int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset,
                            unsigned long config)
 {
        return pinctrl_gpio_set_config(gc->gpiodev->base + offset, config);
@@ -1985,11 +1836,9 @@ static int gpiod_request_commit(struct gpio_desc *desc, const char *label)
 
        if (test_and_set_bit(FLAG_REQUESTED, &desc->flags) == 0) {
                desc_set_label(desc, label ? : "?");
-               ret = 0;
        } else {
-               kfree_const(label);
                ret = -EBUSY;
-               goto done;
+               goto out_free_unlock;
        }
 
        if (gc->request) {
@@ -2002,11 +1851,10 @@ static int gpiod_request_commit(struct gpio_desc *desc, const char *label)
                        ret = -EINVAL;
                spin_lock_irqsave(&gpio_lock, flags);
 
-               if (ret < 0) {
+               if (ret) {
                        desc_set_label(desc, NULL);
-                       kfree_const(label);
                        clear_bit(FLAG_REQUESTED, &desc->flags);
-                       goto done;
+                       goto out_free_unlock;
                }
        }
        if (gc->get_direction) {
@@ -2015,8 +1863,12 @@ static int gpiod_request_commit(struct gpio_desc *desc, const char *label)
                gpiod_get_direction(desc);
                spin_lock_irqsave(&gpio_lock, flags);
        }
-done:
        spin_unlock_irqrestore(&gpio_lock, flags);
+       return 0;
+
+out_free_unlock:
+       spin_unlock_irqrestore(&gpio_lock, flags);
+       kfree_const(label);
        return ret;
 }
 
@@ -2068,7 +1920,7 @@ int gpiod_request(struct gpio_desc *desc, const char *label)
 
        if (try_module_get(gdev->owner)) {
                ret = gpiod_request_commit(desc, label);
-               if (ret < 0)
+               if (ret)
                        module_put(gdev->owner);
                else
                        get_device(&gdev->dev);
@@ -2151,7 +2003,7 @@ void gpiod_free(struct gpio_desc *desc)
  * help with diagnostics, and knowing that the signal is used as a GPIO
  * can help avoid accidentally multiplexing it to another controller.
  */
-const char *gpiochip_is_requested(struct gpio_chip *gc, unsigned offset)
+const char *gpiochip_is_requested(struct gpio_chip *gc, unsigned int offset)
 {
        struct gpio_desc *desc;
 
@@ -2251,30 +2103,49 @@ static int gpio_do_set_config(struct gpio_chip *gc, unsigned int offset,
        return gc->set_config(gc, offset, config);
 }
 
-static int gpio_set_config(struct gpio_desc *desc, enum pin_config_param mode)
+static int gpio_set_config_with_argument(struct gpio_desc *desc,
+                                        enum pin_config_param mode,
+                                        u32 argument)
 {
        struct gpio_chip *gc = desc->gdev->chip;
        unsigned long config;
-       unsigned arg;
+
+       config = pinconf_to_config_packed(mode, argument);
+       return gpio_do_set_config(gc, gpio_chip_hwgpio(desc), config);
+}
+
+static int gpio_set_config_with_argument_optional(struct gpio_desc *desc,
+                                                 enum pin_config_param mode,
+                                                 u32 argument)
+{
+       struct device *dev = &desc->gdev->dev;
+       int gpio = gpio_chip_hwgpio(desc);
+       int ret;
+
+       ret = gpio_set_config_with_argument(desc, mode, argument);
+       if (ret != -ENOTSUPP)
+               return ret;
 
        switch (mode) {
-       case PIN_CONFIG_BIAS_PULL_DOWN:
-       case PIN_CONFIG_BIAS_PULL_UP:
-               arg = 1;
+       case PIN_CONFIG_PERSIST_STATE:
+               dev_dbg(dev, "Persistence not supported for GPIO %d\n", gpio);
                break;
-
        default:
-               arg = 0;
+               break;
        }
 
-       config = PIN_CONF_PACKED(mode, arg);
-       return gpio_do_set_config(gc, gpio_chip_hwgpio(desc), config);
+       return 0;
+}
+
+static int gpio_set_config(struct gpio_desc *desc, enum pin_config_param mode)
+{
+       return gpio_set_config_with_argument(desc, mode, 0);
 }
 
 static int gpio_set_bias(struct gpio_desc *desc)
 {
-       int bias = 0;
-       int ret = 0;
+       enum pin_config_param bias;
+       unsigned int arg;
 
        if (test_bit(FLAG_BIAS_DISABLE, &desc->flags))
                bias = PIN_CONFIG_BIAS_DISABLE;
@@ -2282,13 +2153,28 @@ static int gpio_set_bias(struct gpio_desc *desc)
                bias = PIN_CONFIG_BIAS_PULL_UP;
        else if (test_bit(FLAG_PULL_DOWN, &desc->flags))
                bias = PIN_CONFIG_BIAS_PULL_DOWN;
+       else
+               return 0;
 
-       if (bias) {
-               ret = gpio_set_config(desc, bias);
-               if (ret != -ENOTSUPP)
-                       return ret;
+       switch (bias) {
+       case PIN_CONFIG_BIAS_PULL_DOWN:
+       case PIN_CONFIG_BIAS_PULL_UP:
+               arg = 1;
+               break;
+
+       default:
+               arg = 0;
+               break;
        }
-       return 0;
+
+       return gpio_set_config_with_argument_optional(desc, bias, arg);
+}
+
+int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce)
+{
+       return gpio_set_config_with_argument_optional(desc,
+                                                     PIN_CONFIG_INPUT_DEBOUNCE,
+                                                     debounce);
 }
 
 /**
@@ -2510,7 +2396,7 @@ EXPORT_SYMBOL_GPL(gpiod_set_config);
  * 0 on success, %-ENOTSUPP if the controller doesn't support setting the
  * debounce time.
  */
-int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
+int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce)
 {
        unsigned long config;
 
@@ -2529,11 +2415,6 @@ EXPORT_SYMBOL_GPL(gpiod_set_debounce);
  */
 int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
 {
-       struct gpio_chip *gc;
-       unsigned long packed;
-       int gpio;
-       int rc;
-
        VALIDATE_DESC(desc);
        /*
         * Handle FLAG_TRANSITORY first, enabling queries to gpiolib for
@@ -2542,21 +2423,9 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
        assign_bit(FLAG_TRANSITORY, &desc->flags, transitory);
 
        /* If the driver supports it, set the persistence state now */
-       gc = desc->gdev->chip;
-       if (!gc->set_config)
-               return 0;
-
-       packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE,
-                                         !transitory);
-       gpio = gpio_chip_hwgpio(desc);
-       rc = gpio_do_set_config(gc, gpio, packed);
-       if (rc == -ENOTSUPP) {
-               dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n",
-                               gpio);
-               return 0;
-       }
-
-       return rc;
+       return gpio_set_config_with_argument_optional(desc,
+                                                     PIN_CONFIG_PERSIST_STATE,
+                                                     !transitory);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_transitory);
 
@@ -3784,7 +3653,7 @@ struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode,
 
                desc = fwnode_get_named_gpiod(fwnode, prop_name, index, flags,
                                              label);
-               if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT))
+               if (!gpiod_not_found(desc))
                        break;
        }
 
@@ -3960,7 +3829,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
         * Either we are not using DT or ACPI, or their lookup did not return
         * a result. In that case, use platform lookup as a fallback.
         */
-       if (!desc || desc == ERR_PTR(-ENOENT)) {
+       if (!desc || gpiod_not_found(desc)) {
                dev_dbg(dev, "using lookup tables for GPIO lookup\n");
                desc = gpiod_find(dev, con_id, idx, &lookupflags);
        }
@@ -3975,7 +3844,7 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
         * the device name as label
         */
        ret = gpiod_request(desc, con_id ? con_id : devname);
-       if (ret < 0) {
+       if (ret) {
                if (ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE) {
                        /*
                         * This happens when there are several consumers for
@@ -4095,10 +3964,8 @@ struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
        struct gpio_desc *desc;
 
        desc = gpiod_get_index(dev, con_id, index, flags);
-       if (IS_ERR(desc)) {
-               if (PTR_ERR(desc) == -ENOENT)
-                       return NULL;
-       }
+       if (gpiod_not_found(desc))
+               return NULL;
 
        return desc;
 }
@@ -4300,7 +4167,7 @@ struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev,
        struct gpio_descs *descs;
 
        descs = gpiod_get_array(dev, con_id, flags);
-       if (PTR_ERR(descs) == -ENOENT)
+       if (gpiod_not_found(descs))
                return NULL;
 
        return descs;
index b674b5b..30bc3f8 100644 (file)
@@ -116,6 +116,7 @@ struct gpio_desc {
 #define FLAG_BIAS_DISABLE    15        /* GPIO has pull disabled */
 #define FLAG_EDGE_RISING     16        /* GPIO CDEV detects rising edge events */
 #define FLAG_EDGE_FALLING    17        /* GPIO CDEV detects falling edge events */
+#define FLAG_EVENT_CLOCK_REALTIME      18 /* GPIO CDEV reports REALTIME timestamps in events */
 
        /* Connection label */
        const char              *label;
@@ -130,10 +131,13 @@ struct gpio_desc {
 #endif
 };
 
+#define gpiod_not_found(desc)          (IS_ERR(desc) && PTR_ERR(desc) == -ENOENT)
+
 int gpiod_request(struct gpio_desc *desc, const char *label);
 void gpiod_free(struct gpio_desc *desc);
 int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
                unsigned long lflags, enum gpiod_flags dflags);
+int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce);
 int gpiod_hog(struct gpio_desc *desc, const char *name,
                unsigned long lflags, enum gpiod_flags dflags);
 
index 6e29532..5993dd0 100644 (file)
@@ -1024,6 +1024,7 @@ struct amdgpu_device {
        /* enable runtime pm on the device */
        bool                            runpm;
        bool                            in_runpm;
+       bool                            has_pr3;
 
        bool                            pm_sysfs_en;
        bool                            ucode_sysfs_en;
@@ -1230,6 +1231,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
                                             const u32 *registers,
                                             const u32 array_size);
 
+bool amdgpu_device_supports_atpx(struct drm_device *dev);
 bool amdgpu_device_supports_boco(struct drm_device *dev);
 bool amdgpu_device_supports_baco(struct drm_device *dev);
 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
@@ -1280,6 +1282,8 @@ int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
 void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
                             unsigned long arg);
+int amdgpu_info_ioctl(struct drm_device *dev, void *data,
+                     struct drm_file *filp);
 
 /*
  * functions used by amdgpu_encoder.c
@@ -1311,11 +1315,11 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);
 
 void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
                struct amdgpu_dm_backlight_caps *caps);
-bool amdgpu_acpi_is_s0ix_supported(void);
+bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev);
 #else
 static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
 static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
-static inline bool amdgpu_acpi_is_s0ix_supported(void) { return false; }
+static inline bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { return false; }
 #endif
 
 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
index 4f4fda5..8155c54 100644 (file)
@@ -901,10 +901,12 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
  *
  * returns true if supported, false if not.
  */
-bool amdgpu_acpi_is_s0ix_supported(void)
+bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 {
-       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
-               return true;
+       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
+               if (adev->flags & AMD_IS_APU)
+                       return true;
+       }
 
        return false;
 }
index 7791d07..2d991da 100644 (file)
@@ -1213,7 +1213,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
        if (ret) {
-               pr_debug("Insufficient system memory\n");
+               pr_debug("Insufficient memory\n");
                goto err_reserve_limit;
        }
 
index 79dd85f..7d2f7a2 100644 (file)
@@ -212,14 +212,14 @@ static DEVICE_ATTR(serial_number, S_IRUGO,
                amdgpu_device_get_serial_number, NULL);
 
 /**
- * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control
+ * amdgpu_device_supports_atpx - Is the device a dGPU with HG/PX power control
  *
  * @dev: drm_device pointer
  *
  * Returns true if the device is a dGPU with HG/PX power control,
  * otherwise return false.
  */
-bool amdgpu_device_supports_boco(struct drm_device *dev)
+bool amdgpu_device_supports_atpx(struct drm_device *dev)
 {
        struct amdgpu_device *adev = drm_to_adev(dev);
 
@@ -229,6 +229,23 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
 }
 
 /**
+ * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device is a dGPU with HG/PX power control,
+ * otherwise return false.
+ */
+bool amdgpu_device_supports_boco(struct drm_device *dev)
+{
+       struct amdgpu_device *adev = drm_to_adev(dev);
+
+       if (adev->has_pr3)
+               return true;
+       return false;
+}
+
+/**
  * amdgpu_device_supports_baco - Does the device support BACO
  *
  * @dev: drm_device pointer
@@ -1398,7 +1415,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
        struct drm_device *dev = pci_get_drvdata(pdev);
        int r;
 
-       if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)
+       if (amdgpu_device_supports_atpx(dev) && state == VGA_SWITCHEROO_OFF)
                return;
 
        if (state == VGA_SWITCHEROO_ON) {
@@ -2650,7 +2667,7 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
 {
        int i, r;
 
-       if (!amdgpu_acpi_is_s0ix_supported() || amdgpu_in_reset(adev)) {
+       if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
                amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
                amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
        }
@@ -3177,7 +3194,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        struct drm_device *ddev = adev_to_drm(adev);
        struct pci_dev *pdev = adev->pdev;
        int r, i;
-       bool boco = false;
+       bool atpx = false;
        u32 max_MBps;
 
        adev->shutdown = false;
@@ -3349,15 +3366,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
                vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode);
 
-       if (amdgpu_device_supports_boco(ddev))
-               boco = true;
+       if (amdgpu_device_supports_atpx(ddev))
+               atpx = true;
        if (amdgpu_has_atpx() &&
            (amdgpu_is_atpx_hybrid() ||
             amdgpu_has_atpx_dgpu_power_cntl()) &&
            !pci_is_thunderbolt_attached(adev->pdev))
                vga_switcheroo_register_client(adev->pdev,
-                                              &amdgpu_switcheroo_ops, boco);
-       if (boco)
+                                              &amdgpu_switcheroo_ops, atpx);
+       if (atpx)
                vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
        if (amdgpu_emu_mode == 1) {
@@ -3540,7 +3557,7 @@ fence_driver_init:
 
 failed:
        amdgpu_vf_error_trans_all(adev);
-       if (boco)
+       if (atpx)
                vga_switcheroo_fini_domain_pm_ops(adev->dev);
 
 failed_unmap:
@@ -3604,7 +3621,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
             amdgpu_has_atpx_dgpu_power_cntl()) &&
            !pci_is_thunderbolt_attached(adev->pdev))
                vga_switcheroo_unregister_client(adev->pdev);
-       if (amdgpu_device_supports_boco(adev_to_drm(adev)))
+       if (amdgpu_device_supports_atpx(adev_to_drm(adev)))
                vga_switcheroo_fini_domain_pm_ops(adev->dev);
        if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
                vga_client_register(adev->pdev, NULL, NULL, NULL);
@@ -3710,7 +3727,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
        amdgpu_fence_driver_suspend(adev);
 
-       if (!amdgpu_acpi_is_s0ix_supported() || amdgpu_in_reset(adev))
+       if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
                r = amdgpu_device_ip_suspend_phase2(adev);
        else
                amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
@@ -3744,7 +3761,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
                return 0;
 
-       if (amdgpu_acpi_is_s0ix_supported())
+       if (amdgpu_acpi_is_s0ix_supported(adev))
                amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
 
        /* post card */
index ebdab31..72efd57 100644 (file)
@@ -1340,7 +1340,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
        }
 
        adev->in_runpm = true;
-       if (amdgpu_device_supports_boco(drm_dev))
+       if (amdgpu_device_supports_atpx(drm_dev))
                drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
        drm_kms_helper_poll_disable(drm_dev);
 
@@ -1348,13 +1348,11 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
        if (ret)
                return ret;
 
-       if (amdgpu_device_supports_boco(drm_dev)) {
+       if (amdgpu_device_supports_atpx(drm_dev)) {
                /* Only need to handle PCI state in the driver for ATPX
                 * PCI core handles it for _PR3.
                 */
-               if (amdgpu_is_atpx_hybrid()) {
-                       pci_ignore_hotplug(pdev);
-               } else {
+               if (!amdgpu_is_atpx_hybrid()) {
                        amdgpu_device_cache_pci_state(pdev);
                        pci_disable_device(pdev);
                        pci_ignore_hotplug(pdev);
@@ -1378,28 +1376,31 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
        if (!adev->runpm)
                return -EINVAL;
 
-       if (amdgpu_device_supports_boco(drm_dev)) {
+       if (amdgpu_device_supports_atpx(drm_dev)) {
                drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
                /* Only need to handle PCI state in the driver for ATPX
                 * PCI core handles it for _PR3.
                 */
-               if (amdgpu_is_atpx_hybrid()) {
-                       pci_set_master(pdev);
-               } else {
+               if (!amdgpu_is_atpx_hybrid()) {
                        pci_set_power_state(pdev, PCI_D0);
                        amdgpu_device_load_pci_state(pdev);
                        ret = pci_enable_device(pdev);
                        if (ret)
                                return ret;
-                       pci_set_master(pdev);
                }
+               pci_set_master(pdev);
+       } else if (amdgpu_device_supports_boco(drm_dev)) {
+               /* Only need to handle PCI state in the driver for ATPX
+                * PCI core handles it for _PR3.
+                */
+               pci_set_master(pdev);
        } else if (amdgpu_device_supports_baco(drm_dev)) {
                amdgpu_device_baco_exit(drm_dev);
        }
        ret = amdgpu_device_resume(drm_dev, false);
        drm_kms_helper_poll_enable(drm_dev);
-       if (amdgpu_device_supports_boco(drm_dev))
+       if (amdgpu_device_supports_atpx(drm_dev))
                drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
        adev->in_runpm = false;
        return 0;
@@ -1533,8 +1534,6 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
        return 0;
 }
 
-int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
-
 const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
        DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
index 02af47d..c2ced5b 100644 (file)
@@ -496,13 +496,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
                break;
        }
 
-       if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE))
+       if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
                size = 0;
-       else
+       } else {
                size = amdgpu_gmc_get_vbios_fb_size(adev);
 
-       if (adev->mman.keep_stolen_vga_memory)
-               size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
+               if (adev->mman.keep_stolen_vga_memory)
+                       size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
+       }
 
        /* set to 0 if the pre-OS buffer uses up most of vram */
        if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
index fc12fc7..b16b327 100644 (file)
@@ -133,6 +133,7 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
 int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
 {
        struct drm_device *dev;
+       struct pci_dev *parent;
        int r, acpi_status;
 
        dev = adev_to_drm(adev);
@@ -144,6 +145,9 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
            !pci_is_thunderbolt_attached(dev->pdev))
                flags |= AMD_IS_PX;
 
+       parent = pci_upstream_bridge(adev->pdev);
+       adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
+
        /* amdgpu_device_init should report only fatal error
         * like memory allocation failure or iomapping failure,
         * or memory manager initialization failure, it must
@@ -156,9 +160,14 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
                goto out;
        }
 
-       if (amdgpu_device_supports_boco(dev) &&
-           (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
+       if (amdgpu_device_supports_atpx(dev) &&
+           (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */
+               adev->runpm = true;
+               dev_info(adev->dev, "Using ATPX for runtime pm\n");
+       } else if (amdgpu_device_supports_boco(dev) &&
+                  (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
                adev->runpm = true;
+               dev_info(adev->dev, "Using BOCO for runtime pm\n");
        } else if (amdgpu_device_supports_baco(dev) &&
                   (amdgpu_runtime_pm != 0)) {
                switch (adev->asic_type) {
@@ -180,6 +189,8 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
                        adev->runpm = true;
                        break;
                }
+               if (adev->runpm)
+                       dev_info(adev->dev, "Using BACO for runtime pm\n");
        }
 
        /* Call ACPI methods: require modeset init
@@ -192,7 +203,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
 
        if (adev->runpm) {
                /* only need to skip on ATPX */
-               if (amdgpu_device_supports_boco(dev) &&
+               if (amdgpu_device_supports_atpx(dev) &&
                    !amdgpu_is_atpx_hybrid())
                        dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
                pm_runtime_use_autosuspend(dev->dev);
index 324d5e3..6752d8b 100644 (file)
@@ -358,10 +358,11 @@ TRACE_EVENT(amdgpu_vm_update_ptes,
                        }
        ),
        TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx,"
-                 " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid,
+                 " flags:0x%llx, incr:%llu, dst:\n%s%s", __entry->pid,
                  __entry->vm_ctx, __entry->start, __entry->end,
                  __entry->flags, __entry->incr,  __print_array(
-                 __get_dynamic_array(dst), __entry->nptes, 8))
+                 __get_dynamic_array(dst), min(__entry->nptes, 32u), 8),
+                 __entry->nptes > 32 ? "..." : "")
 );
 
 TRACE_EVENT(amdgpu_vm_set_ptes,
index 7c5b60e..8b98967 100644 (file)
@@ -240,7 +240,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
                version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
                version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
-               DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
+               DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
                        version_major, version_minor, family_id);
 
                /*
@@ -267,7 +267,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
                dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
                enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
                enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
-               DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n",
+               DRM_INFO("Found UVD firmware ENC: %u.%u DEC: .%u Family ID: %u\n",
                        enc_major, enc_minor, dec_minor, family_id);
 
                adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
index 9791a40..0d5284b 100644 (file)
@@ -179,7 +179,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
        version_major = (ucode_version >> 20) & 0xfff;
        version_minor = (ucode_version >> 8) & 0xfff;
        binary_id = ucode_version & 0xff;
-       DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n",
+       DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
                version_major, version_minor, binary_id);
        adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
                                (binary_id << 8));
index 1c97244..4a77c74 100644 (file)
@@ -181,7 +181,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
                enc_major = fw_check;
                dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
                vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
-               DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n",
+               DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
                        enc_major, enc_minor, dec_ver, vep, fw_rev);
        } else {
                unsigned int version_major, version_minor, family_id;
@@ -189,7 +189,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
                family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
                version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
                version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
-               DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
+               DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
                        version_major, version_minor, family_id);
        }
 
index 092ff2c..f107385 100644 (file)
@@ -136,6 +136,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
                break;
        case CHIP_SIENNA_CICHLID:
        case CHIP_NAVY_FLOUNDER:
+       case CHIP_DIMGREY_CAVEFISH:
                mmhub_cid = mmhub_client_ids_sienna_cichlid[cid][rw];
                break;
        default:
index f5ce9a9..7767ccc 100644 (file)
@@ -187,7 +187,16 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 
 static int xgpu_ai_request_reset(struct amdgpu_device *adev)
 {
-       return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+       int ret, i = 0;
+
+       while (i < AI_MAILBOX_POLL_MSG_REP_MAX) {
+               ret = xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+               if (!ret)
+                       break;
+               i++;
+       }
+
+       return ret;
 }
 
 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
index 83b453f..5057263 100644 (file)
@@ -25,8 +25,9 @@
 #define __MXGPU_AI_H__
 
 #define AI_MAILBOX_POLL_ACK_TIMEDOUT   500
-#define AI_MAILBOX_POLL_MSG_TIMEDOUT   12000
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define AI_MAILBOX_POLL_FLR_TIMEDOUT   5000
+#define AI_MAILBOX_POLL_MSG_REP_MAX    11
 
 enum idh_request {
        IDH_REQ_GPU_INIT_ACCESS = 1,
index 666ed99..dd5c1e6 100644 (file)
@@ -200,7 +200,16 @@ static int xgpu_nv_send_access_requests(struct amdgpu_device *adev,
 
 static int xgpu_nv_request_reset(struct amdgpu_device *adev)
 {
-       return xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+       int ret, i = 0;
+
+       while (i < NV_MAILBOX_POLL_MSG_REP_MAX) {
+               ret = xgpu_nv_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+               if (!ret)
+                       break;
+               i++;
+       }
+
+       return ret;
 }
 
 static int xgpu_nv_request_full_gpu_access(struct amdgpu_device *adev,
index 52605e1..9f58086 100644 (file)
@@ -27,6 +27,7 @@
 #define NV_MAILBOX_POLL_ACK_TIMEDOUT   500
 #define NV_MAILBOX_POLL_MSG_TIMEDOUT   6000
 #define NV_MAILBOX_POLL_FLR_TIMEDOUT   5000
+#define NV_MAILBOX_POLL_MSG_REP_MAX    11
 
 enum idh_request {
        IDH_REQ_GPU_INIT_ACCESS = 1,
index ac02dd7..6bee367 100644 (file)
@@ -362,6 +362,7 @@ nv_asic_reset_method(struct amdgpu_device *adev)
        switch (adev->asic_type) {
        case CHIP_SIENNA_CICHLID:
        case CHIP_NAVY_FLOUNDER:
+       case CHIP_DIMGREY_CAVEFISH:
                return AMD_RESET_METHOD_MODE1;
        default:
                if (smu_baco_is_support(smu))
index 39e17aa..f1ba36a 100644 (file)
@@ -153,6 +153,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev)
        struct amdgpu_firmware_info *info = NULL;
        const struct common_firmware_header *header = NULL;
 
+       if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID))
+               return 0;
+
        DRM_DEBUG("\n");
 
        switch (adev->asic_type) {
@@ -807,6 +810,37 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)
        return 0;
 }
 
+static int sdma_v5_2_soft_reset(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       u32 grbm_soft_reset;
+       u32 tmp;
+       int i;
+
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               grbm_soft_reset = REG_SET_FIELD(0,
+                                               GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
+                                               1);
+               grbm_soft_reset <<= i;
+
+               tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+               tmp |= grbm_soft_reset;
+               DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+               WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+               tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+               udelay(50);
+
+               tmp &= ~grbm_soft_reset;
+               WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
+               tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
+
+               udelay(50);
+       }
+
+       return 0;
+}
+
 /**
  * sdma_v5_2_start - setup and start the async dma engines
  *
@@ -838,6 +872,7 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)
                        msleep(1000);
        }
 
+       sdma_v5_2_soft_reset(adev);
        /* unhalt the MEs */
        sdma_v5_2_enable(adev, true);
        /* enable sdma ring preemption */
@@ -1366,13 +1401,6 @@ static int sdma_v5_2_wait_for_idle(void *handle)
        return -ETIMEDOUT;
 }
 
-static int sdma_v5_2_soft_reset(void *handle)
-{
-       /* todo */
-
-       return 0;
-}
-
 static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring)
 {
        int i, r = 0;
index b3672d1..e8fb10c 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: MIT
 #
-# Heterogenous system architecture configuration
+# Heterogeneous system architecture configuration
 #
 
 config HSA_AMD
index f0a6f66..e686ce2 100644 (file)
@@ -72,8 +72,8 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
 {
        int i;
-       int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
-               + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
+       int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
+               + pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
 
        /* queue is available for KFD usage if bit is 1 */
        for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
index c238962..2c4dbde 100644 (file)
@@ -196,10 +196,6 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
 
 static int amdgpu_dm_connector_get_modes(struct drm_connector *connector);
 
-static int amdgpu_dm_atomic_commit(struct drm_device *dev,
-                                  struct drm_atomic_state *state,
-                                  bool nonblock);
-
 static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
 
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
@@ -2212,7 +2208,7 @@ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
        .get_format_info = amd_get_format_info,
        .output_poll_changed = drm_fb_helper_output_poll_changed,
        .atomic_check = amdgpu_dm_atomic_check,
-       .atomic_commit = amdgpu_dm_atomic_commit,
+       .atomic_commit = drm_atomic_helper_commit,
 };
 
 static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = {
@@ -5124,9 +5120,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
        int preferred_refresh = 0;
 #if defined(CONFIG_DRM_AMD_DC_DCN)
        struct dsc_dec_dpcd_caps dsc_caps;
-#endif
        uint32_t link_bandwidth_kbps;
-
+#endif
        struct dc_sink *sink = NULL;
        if (aconnector == NULL) {
                DRM_ERROR("aconnector is NULL!\n");
@@ -5208,11 +5203,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                                      aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
                                      aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
                                      &dsc_caps);
-#endif
                link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
                                                             dc_link_get_link_cap(aconnector->dc_link));
 
-#if defined(CONFIG_DRM_AMD_DC_DCN)
                if (aconnector->dsc_settings.dsc_force_enable != DSC_CLK_FORCE_DISABLE && dsc_caps.is_dsc_supported) {
                        /* Set DSC policy according to dsc_clock_en */
                        dc_dsc_policy_set_enable_dsc_when_not_needed(
@@ -5349,7 +5342,7 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
 }
 
 #ifdef CONFIG_DEBUG_FS
-int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc,
+static int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc,
                                            struct drm_crtc_state *crtc_state,
                                            struct drm_property *property,
                                            uint64_t val)
@@ -5373,7 +5366,7 @@ int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc,
        return 0;
 }
 
-int amdgpu_dm_crtc_atomic_get_property(struct drm_crtc *crtc,
+static int amdgpu_dm_crtc_atomic_get_property(struct drm_crtc *crtc,
                                            const struct drm_crtc_state *state,
                                            struct drm_property *property,
                                            uint64_t *val)
@@ -8070,20 +8063,6 @@ static void amdgpu_dm_crtc_copy_transient_flags(struct drm_crtc_state *crtc_stat
        stream_state->mode_changed = drm_atomic_crtc_needs_modeset(crtc_state);
 }
 
-static int amdgpu_dm_atomic_commit(struct drm_device *dev,
-                                  struct drm_atomic_state *state,
-                                  bool nonblock)
-{
-       /*
-        * Add check here for SoC's that support hardware cursor plane, to
-        * unset legacy_cursor_update
-        */
-
-       return drm_atomic_helper_commit(dev, state, nonblock);
-
-       /*TODO Handle EINTR, reenable IRQ*/
-}
-
 /**
  * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation.
  * @state: The atomic state to commit
index 0b31779..2ee6edb 100644 (file)
@@ -337,10 +337,29 @@ struct amdgpu_display_manager {
        const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box;
 
 #ifdef CONFIG_DEBUG_FS
-       /* set the crc calculation window*/
+       /**
+        * @crc_win_x_start_property:
+        *
+        * X start of the crc calculation window
+        */
        struct drm_property *crc_win_x_start_property;
+       /**
+        * @crc_win_y_start_property:
+        *
+        * Y start of the crc calculation window
+        */
        struct drm_property *crc_win_y_start_property;
+       /**
+        * @crc_win_x_end_property:
+        *
+        * X end of the crc calculation window
+        */
        struct drm_property *crc_win_x_end_property;
+       /**
+        * @crc_win_y_end_property:
+        *
+        * Y end of the crc calculation window
+        */
        struct drm_property *crc_win_y_end_property;
 #endif
        /**
index ff6db26..7b886a7 100644 (file)
@@ -81,6 +81,14 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc,
        return pipe_crc_sources;
 }
 
+static void amdgpu_dm_set_crc_window_default(struct dm_crtc_state *dm_crtc_state)
+{
+       dm_crtc_state->crc_window.x_start = 0;
+       dm_crtc_state->crc_window.y_start = 0;
+       dm_crtc_state->crc_window.x_end = 0;
+       dm_crtc_state->crc_window.y_end = 0;
+}
+
 bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state)
 {
        bool ret = true;
@@ -141,7 +149,10 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
        mutex_lock(&adev->dm.dc_lock);
 
        /* Enable CRTC CRC generation if necessary. */
-       if (dm_is_crc_source_crtc(source)) {
+       if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) {
+               if (!enable)
+                       amdgpu_dm_set_crc_window_default(dm_crtc_state);
+
                if (!amdgpu_dm_crc_window_is_default(dm_crtc_state)) {
                        crc_window = &tmp_window;
 
index 6f975c1..8ab0b90 100644 (file)
@@ -24,6 +24,7 @@
  */
 
 #include <linux/version.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_dp_helper.h>
@@ -252,8 +253,10 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
 
 static struct drm_encoder *
 dm_mst_atomic_best_encoder(struct drm_connector *connector,
-                          struct drm_connector_state *connector_state)
+                          struct drm_atomic_state *state)
 {
+       struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
+                                                                                        connector);
        struct drm_device *dev = connector->dev;
        struct amdgpu_device *adev = drm_to_adev(dev);
        struct amdgpu_crtc *acrtc = to_amdgpu_crtc(connector_state->crtc);
index 6f4fe8f..d00b025 100644 (file)
@@ -746,24 +746,24 @@ static struct wm_table ddr4_wm_table_rn = {
                        .wm_inst = WM_B,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 10.12,
-                       .sr_enter_plus_exit_time_us = 11.48,
+                       .sr_exit_time_us = 11.12,
+                       .sr_enter_plus_exit_time_us = 12.48,
                        .valid = true,
                },
                {
                        .wm_inst = WM_C,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 10.12,
-                       .sr_enter_plus_exit_time_us = 11.48,
+                       .sr_exit_time_us = 11.12,
+                       .sr_enter_plus_exit_time_us = 12.48,
                        .valid = true,
                },
                {
                        .wm_inst = WM_D,
                        .wm_type = WM_TYPE_PSTATE_CHG,
                        .pstate_latency_us = 11.72,
-                       .sr_exit_time_us = 10.12,
-                       .sr_enter_plus_exit_time_us = 11.48,
+                       .sr_exit_time_us = 11.12,
+                       .sr_enter_plus_exit_time_us = 12.48,
                        .valid = true,
                },
        }
index 58eb0d6..7339d98 100644 (file)
@@ -2625,6 +2625,26 @@ static void commit_planes_for_stream(struct dc *dc,
                }
        }
 
+       if (update_type != UPDATE_TYPE_FAST) {
+               // If changing VTG FP2: wait until back in vactive to program FP2
+               // Need to ensure that pipe unlock happens soon after to minimize race condition
+               for (i = 0; i < dc->res_pool->pipe_count; i++) {
+                       struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+                       if (pipe_ctx->top_pipe || pipe_ctx->stream != stream)
+                               continue;
+
+                       if (!pipe_ctx->update_flags.bits.global_sync)
+                               continue;
+
+                       pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
+                       pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+
+                       pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+                                       pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
+               }
+       }
+
        if ((update_type != UPDATE_TYPE_FAST) && dc->hwss.interdependent_update_lock)
                dc->hwss.interdependent_update_lock(dc, context, false);
        else
index a901baf..9e1071b 100644 (file)
@@ -3267,9 +3267,6 @@ void core_link_enable_stream(
                        }
                }
 
-#if defined(CONFIG_DRM_AMD_DC_DCN3_0)
-#endif
-
                /* turn off otg test pattern if enable */
                if (pipe_ctx->stream_res.tg->funcs->set_test_pattern)
                        pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg,
index b8f1e2d..3aedadb 100644 (file)
@@ -42,7 +42,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.115"
+#define DC_VER "3.2.116"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
index b409f6b..210466b 100644 (file)
@@ -119,7 +119,8 @@ static const struct link_encoder_funcs dce110_lnk_enc_funcs = {
        .disable_hpd = dce110_link_encoder_disable_hpd,
        .is_dig_enabled = dce110_is_dig_enabled,
        .destroy = dce110_link_encoder_destroy,
-       .get_max_link_cap = dce110_link_encoder_get_max_link_cap
+       .get_max_link_cap = dce110_link_encoder_get_max_link_cap,
+       .get_dig_frontend = dce110_get_dig_frontend,
 };
 
 static enum bp_result link_transmitter_control(
@@ -235,6 +236,44 @@ static void set_link_training_complete(
 
 }
 
+unsigned int dce110_get_dig_frontend(struct link_encoder *enc)
+{
+       struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc);
+       u32 value;
+       enum engine_id result;
+
+       REG_GET(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, &value);
+
+       switch (value) {
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGA:
+               result = ENGINE_ID_DIGA;
+               break;
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGB:
+               result = ENGINE_ID_DIGB;
+               break;
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGC:
+               result = ENGINE_ID_DIGC;
+               break;
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGD:
+               result = ENGINE_ID_DIGD;
+               break;
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGE:
+               result = ENGINE_ID_DIGE;
+               break;
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGF:
+               result = ENGINE_ID_DIGF;
+               break;
+       case DCE110_DIG_FE_SOURCE_SELECT_DIGG:
+               result = ENGINE_ID_DIGG;
+               break;
+       default:
+               // invalid source select DIG
+               result = ENGINE_ID_UNKNOWN;
+       }
+
+       return result;
+}
+
 void dce110_link_encoder_set_dp_phy_pattern_training_pattern(
        struct link_encoder *enc,
        uint32_t index)
@@ -1665,7 +1704,8 @@ static const struct link_encoder_funcs dce60_lnk_enc_funcs = {
        .disable_hpd = dce110_link_encoder_disable_hpd,
        .is_dig_enabled = dce110_is_dig_enabled,
        .destroy = dce110_link_encoder_destroy,
-       .get_max_link_cap = dce110_link_encoder_get_max_link_cap
+       .get_max_link_cap = dce110_link_encoder_get_max_link_cap,
+       .get_dig_frontend = dce110_get_dig_frontend
 };
 
 void dce60_link_encoder_construct(
index cb714a4..fc6ade8 100644 (file)
@@ -295,6 +295,8 @@ void dce110_link_encoder_connect_dig_be_to_fe(
        enum engine_id engine,
        bool connect);
 
+unsigned int dce110_get_dig_frontend(struct link_encoder *enc);
+
 void dce110_link_encoder_set_dp_phy_pattern_training_pattern(
        struct link_encoder *enc,
        uint32_t index);
index 82bc4e1..915fbb8 100644 (file)
@@ -1268,7 +1268,7 @@ void dce120_timing_generator_construct(
        tg110->min_h_front_porch = 0;
        tg110->min_h_back_porch = 0;
 
-       tg110->min_h_sync_width = 8;
+       tg110->min_h_sync_width = 4;
        tg110->min_v_sync_width = 1;
        tg110->min_v_blank = 3;
 }
index 75637c2..6f42d10 100644 (file)
@@ -124,11 +124,11 @@ bool hubbub1_verify_allow_pstate_change_high(
         * still not asserted, we are probably stuck and going to hang
         *
         * TODO: Figure out why it takes ~100us on linux
-        * pstate takes around ~100us on linux. Unknown currently as to
-        * why it takes that long on linux
+        * pstate takes around ~100us (up to 200us) on linux. Unknown currently
+        * as to why it takes that long on linux
         */
        const unsigned int pstate_wait_timeout_us = 200;
-       const unsigned int pstate_wait_expected_timeout_us = 40;
+       const unsigned int pstate_wait_expected_timeout_us = 180;
        static unsigned int max_sampled_pstate_wait_us; /* data collection */
        static bool forced_pstate_allow; /* help with revert wa */
 
index 9f7d6b0..cfc130e 100644 (file)
@@ -2736,7 +2736,7 @@ static void dcn10_program_all_pipe_in_tree(
                                pipe_ctx->pipe_dlg_param.vupdate_width);
 
                pipe_ctx->stream_res.tg->funcs->set_vtg_params(
-                               pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+                               pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, true);
 
                if (hws->funcs.setup_vupdate_interrupt)
                        hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
index a125d3f..f033397 100644 (file)
@@ -272,7 +272,7 @@ void optc1_program_timing(
                        vupdate_offset,
                        vupdate_width);
 
-       optc->funcs->set_vtg_params(optc, dc_crtc_timing);
+       optc->funcs->set_vtg_params(optc, dc_crtc_timing, true);
 
        /* TODO
         * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1
@@ -312,7 +312,7 @@ void optc1_program_timing(
 }
 
 void optc1_set_vtg_params(struct timing_generator *optc,
-               const struct dc_crtc_timing *dc_crtc_timing)
+               const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2)
 {
        struct dc_crtc_timing patched_crtc_timing;
        uint32_t asic_blank_end;
@@ -348,9 +348,12 @@ void optc1_set_vtg_params(struct timing_generator *optc,
                }
        }
 
-       REG_UPDATE_2(CONTROL,
-                       VTG0_FP2, v_fp2,
-                       VTG0_VCOUNT_INIT, v_init);
+       if (program_fp2)
+               REG_UPDATE_2(CONTROL,
+                               VTG0_FP2, v_fp2,
+                               VTG0_VCOUNT_INIT, v_init);
+       else
+               REG_UPDATE(CONTROL, VTG0_VCOUNT_INIT, v_init);
 }
 
 void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
@@ -1540,7 +1543,7 @@ void dcn10_timing_generator_init(struct optc *optc1)
        optc1->min_h_blank = 32;
        optc1->min_v_blank = 3;
        optc1->min_v_blank_interlace = 5;
-       optc1->min_h_sync_width = 8;
+       optc1->min_h_sync_width = 4;
        optc1->min_v_sync_width = 1;
 }
 
index 344eb48..b12bd9a 100644 (file)
@@ -700,6 +700,6 @@ bool optc1_get_crc(struct timing_generator *optc,
 bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
 
 void optc1_set_vtg_params(struct timing_generator *optc,
-               const struct dc_crtc_timing *dc_crtc_timing);
+               const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2);
 
 #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
index 9e38c37..76b3346 100644 (file)
@@ -81,7 +81,9 @@
        SRI(DP_MSE_RATE_UPDATE, DP, id), \
        SRI(DP_PIXEL_FORMAT, DP, id), \
        SRI(DP_SEC_CNTL, DP, id), \
+       SRI(DP_SEC_CNTL1, DP, id), \
        SRI(DP_SEC_CNTL2, DP, id), \
+       SRI(DP_SEC_CNTL5, DP, id), \
        SRI(DP_SEC_CNTL6, DP, id), \
        SRI(DP_STEER_FIFO, DP, id), \
        SRI(DP_VID_M, DP, id), \
@@ -126,7 +128,9 @@ struct dcn10_stream_enc_registers {
        uint32_t DP_MSE_RATE_UPDATE;
        uint32_t DP_PIXEL_FORMAT;
        uint32_t DP_SEC_CNTL;
+       uint32_t DP_SEC_CNTL1;
        uint32_t DP_SEC_CNTL2;
+       uint32_t DP_SEC_CNTL5;
        uint32_t DP_SEC_CNTL6;
        uint32_t DP_STEER_FIFO;
        uint32_t DP_VID_M;
@@ -411,6 +415,8 @@ struct dcn10_stream_enc_registers {
        type DP_SEC_GSP3_ENABLE;\
        type DP_SEC_GSP4_ENABLE;\
        type DP_SEC_GSP5_ENABLE;\
+       type DP_SEC_GSP5_LINE_NUM;\
+       type DP_SEC_GSP5_LINE_REFERENCE;\
        type DP_SEC_GSP6_ENABLE;\
        type DP_SEC_GSP7_ENABLE;\
        type DP_SEC_GSP7_PPS;\
index abcb060..31a4771 100644 (file)
@@ -1595,7 +1595,7 @@ static void dcn20_program_pipe(
                                pipe_ctx->pipe_dlg_param.vupdate_width);
 
                pipe_ctx->stream_res.tg->funcs->set_vtg_params(
-                               pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+                               pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
 
                if (hws->funcs.setup_vupdate_interrupt)
                        hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx);
@@ -1695,14 +1695,6 @@ void dcn20_program_front_end_for_ctx(
                                && context->res_ctx.pipe_ctx[i].stream)
                        hws->funcs.blank_pixel_data(dc, &context->res_ctx.pipe_ctx[i], true);
 
-       /* wait for outstanding pending changes before adding or removing planes */
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (context->res_ctx.pipe_ctx[i].update_flags.bits.disable ||
-                               context->res_ctx.pipe_ctx[i].update_flags.bits.enable) {
-                       dc->hwss.wait_for_pending_cleared(dc, context);
-                       break;
-               }
-       }
 
        /* Disconnect mpcc */
        for (i = 0; i < dc->res_pool->pipe_count; i++)
@@ -1856,7 +1848,7 @@ bool dcn20_update_bandwidth(
                                        pipe_ctx->pipe_dlg_param.vupdate_width);
 
                        pipe_ctx->stream_res.tg->funcs->set_vtg_params(
-                                       pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
+                                       pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, false);
 
                        if (pipe_ctx->prev_odm_pipe == NULL)
                                hws->funcs.blank_pixel_data(dc, pipe_ctx, blank);
@@ -2251,11 +2243,11 @@ void dcn20_get_mpctree_visual_confirm_color(
 {
        const struct tg_color pipe_colors[6] = {
                        {MAX_TG_COLOR_VALUE, 0, 0}, // red
-                       {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, // yellow
-                       {0, MAX_TG_COLOR_VALUE, 0}, // blue
+                       {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE / 4, 0}, // orange
+                       {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, // yellow
+                       {0, MAX_TG_COLOR_VALUE, 0}, // green
+                       {0, 0, MAX_TG_COLOR_VALUE}, // blue
                        {MAX_TG_COLOR_VALUE / 2, 0, MAX_TG_COLOR_VALUE / 2}, // purple
-                       {0, 0, MAX_TG_COLOR_VALUE}, // green
-                       {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE * 2 / 3, 0}, // orange
        };
 
        struct pipe_ctx *top_pipe = pipe_ctx;
@@ -2280,14 +2272,11 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx)
 
        // input to MPCC is always RGB, by default leave black_color at 0
        if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR) {
-               hws->funcs.get_hdr_visual_confirm_color(
-                               pipe_ctx, &blnd_cfg.black_color);
+               hws->funcs.get_hdr_visual_confirm_color(pipe_ctx, &blnd_cfg.black_color);
        } else if (dc->debug.visual_confirm == VISUAL_CONFIRM_SURFACE) {
-               hws->funcs.get_surface_visual_confirm_color(
-                               pipe_ctx, &blnd_cfg.black_color);
+               hws->funcs.get_surface_visual_confirm_color(pipe_ctx, &blnd_cfg.black_color);
        } else if (dc->debug.visual_confirm == VISUAL_CONFIRM_MPCTREE) {
-               dcn20_get_mpctree_visual_confirm_color(
-                               pipe_ctx, &blnd_cfg.black_color);
+               dcn20_get_mpctree_visual_confirm_color(pipe_ctx, &blnd_cfg.black_color);
        }
 
        if (per_pixel_alpha)
index d2a805b..9a881e6 100644 (file)
@@ -83,6 +83,8 @@
        SE_SF(DIG0_HDMI_METADATA_PACKET_CONTROL, HDMI_METADATA_PACKET_LINE, mask_sh),\
        SE_SF(DIG0_DIG_FE_CNTL, DOLBY_VISION_EN, mask_sh),\
        SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_COMBINE, mask_sh),\
+       SE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP5_LINE_REFERENCE, mask_sh),\
+       SE_SF(DP0_DP_SEC_CNTL5, DP_SEC_GSP5_LINE_NUM, mask_sh),\
        SE_SF(DP0_DP_SEC_FRAMING4, DP_SST_SDP_SPLITTING, mask_sh)
 
 void dcn20_stream_encoder_construct(
index b7efa77..e44a374 100644 (file)
@@ -32,5 +32,6 @@ struct dccg *dccg21_create(
        const struct dccg_shift *dccg_shift,
        const struct dccg_mask *dccg_mask);
 
+void dccg21_update_dpp_dto(struct dccg *dccg, int dpp_inst, int req_dppclk);
 
 #endif /* __DCN21_DCCG_H__ */
index 2ae159e..46ea39f 100644 (file)
@@ -51,7 +51,7 @@
        (enc10->link_regs->index)
 
 
-static bool dcn30_link_encoder_validate_output_with_stream(
+bool dcn30_link_encoder_validate_output_with_stream(
        struct link_encoder *enc,
        const struct dc_stream_state *stream)
 {
index 2fbf879..f2d90f2 100644 (file)
@@ -78,4 +78,8 @@ void dcn30_link_encoder_construct(
 
 void enc3_hw_init(struct link_encoder *enc);
 
+bool dcn30_link_encoder_validate_output_with_stream(
+       struct link_encoder *enc,
+       const struct dc_stream_state *stream);
+
 #endif /* __DC_LINK_ENCODER__DCN30_H__ */
index 283995a..3deb3fb 100644 (file)
@@ -668,7 +668,7 @@ void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx)
        is_hdmi_tmds = dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal);
        is_dp = dc_is_dp_signal(pipe_ctx->stream->signal);
 
-       if (!is_hdmi_tmds)
+       if (!is_hdmi_tmds && !is_dp)
                return;
 
        if (is_hdmi_tmds)
index b1f228f..3ba3991 100644 (file)
@@ -350,7 +350,7 @@ void dcn30_timing_generator_init(struct optc *optc1)
        optc1->min_h_blank = 32;
        optc1->min_v_blank = 3;
        optc1->min_v_blank_interlace = 5;
-       optc1->min_h_sync_width = 8;
+       optc1->min_h_sync_width = 4;
        optc1->min_v_sync_width = 1;
 }
 
index 12d5718..f7632fe 100644 (file)
@@ -271,7 +271,7 @@ struct timing_generator_funcs {
                        struct dc_crtc_timing *hw_crtc_timing);
 
        void (*set_vtg_params)(struct timing_generator *optc,
-                       const struct dc_crtc_timing *dc_crtc_timing);
+                       const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2);
 
        void (*set_dsc_config)(struct timing_generator *optc,
                               enum optc_dsc_mode dsc_mode,
index b20a39f..f512bda 100644 (file)
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x931573111
+#define DMUB_FW_VERSION_GIT_HASH 0xa18e25995
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 45
+#define DMUB_FW_VERSION_REVISION 46
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
@@ -514,12 +514,20 @@ enum dp_aux_request_action {
 
 enum aux_return_code_type {
        AUX_RET_SUCCESS = 0,
+       AUX_RET_ERROR_UNKNOWN,
+       AUX_RET_ERROR_INVALID_REPLY,
        AUX_RET_ERROR_TIMEOUT,
-       AUX_RET_ERROR_NO_DATA,
+       AUX_RET_ERROR_HPD_DISCON,
+       AUX_RET_ERROR_ENGINE_ACQUIRE,
        AUX_RET_ERROR_INVALID_OPERATION,
        AUX_RET_ERROR_PROTOCOL_ERROR,
 };
 
+enum aux_channel_type {
+       AUX_CHANNEL_LEGACY_DDC,
+       AUX_CHANNEL_DPIA
+};
+
 /* DP AUX command */
 struct aux_transaction_parameters {
        uint8_t is_i2c_over_aux;
@@ -532,9 +540,10 @@ struct aux_transaction_parameters {
 
 struct dmub_cmd_dp_aux_control_data {
        uint32_t handle;
-       uint8_t port_index;
+       uint8_t instance;
        uint8_t sw_crc_enabled;
        uint16_t timeout;
+       enum aux_channel_type type;
        struct aux_transaction_parameters dpaux;
 };
 
@@ -558,7 +567,7 @@ struct aux_reply_data {
 
 struct aux_reply_control_data {
        uint32_t handle;
-       uint8_t phy_port_index;
+       uint8_t instance;
        uint8_t result;
        uint16_t pad;
 };
@@ -581,7 +590,7 @@ enum dp_hpd_status {
 };
 
 struct dp_hpd_data {
-       uint8_t phy_port_index;
+       uint8_t instance;
        uint8_t hpd_type;
        uint8_t hpd_status;
        uint8_t pad;
@@ -732,27 +741,30 @@ enum dmub_cmd_abm_type {
 struct abm_config_table {
        /* Parameters for crgb conversion */
        uint16_t crgb_thresh[NUM_POWER_FN_SEGS];                 // 0B
-       uint16_t crgb_offset[NUM_POWER_FN_SEGS];                 // 15B
-       uint16_t crgb_slope[NUM_POWER_FN_SEGS];                  // 31B
+       uint16_t crgb_offset[NUM_POWER_FN_SEGS];                 // 16B
+       uint16_t crgb_slope[NUM_POWER_FN_SEGS];                  // 32B
 
        /* Parameters for custom curve */
-       uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS];        // 47B
-       uint16_t backlight_offsets[NUM_BL_CURVE_SEGS];           // 79B
-
-       uint16_t ambient_thresholds_lux[NUM_AMBI_LEVEL];         // 111B
-       uint16_t min_abm_backlight;                              // 121B
-
-       uint8_t min_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];   // 123B
-       uint8_t max_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];   // 143B
-       uint8_t bright_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; // 163B
-       uint8_t dark_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];   // 183B
-       uint8_t hybrid_factor[NUM_AGGR_LEVEL];                   // 203B
-       uint8_t contrast_factor[NUM_AGGR_LEVEL];                 // 207B
-       uint8_t deviation_gain[NUM_AGGR_LEVEL];                  // 211B
-       uint8_t min_knee[NUM_AGGR_LEVEL];                        // 215B
-       uint8_t max_knee[NUM_AGGR_LEVEL];                        // 219B
-       uint8_t iir_curve[NUM_AMBI_LEVEL];                       // 223B
-       uint8_t pad3[3];                                         // 228B
+       uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS];        // 48B
+       uint16_t backlight_offsets[NUM_BL_CURVE_SEGS];           // 78B
+
+       uint16_t ambient_thresholds_lux[NUM_AMBI_LEVEL];         // 112B
+       uint16_t min_abm_backlight;                              // 122B
+
+       uint8_t min_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];   // 124B
+       uint8_t max_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];   // 144B
+       uint8_t bright_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; // 164B
+       uint8_t dark_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL];   // 184B
+       uint8_t hybrid_factor[NUM_AGGR_LEVEL];                   // 204B
+       uint8_t contrast_factor[NUM_AGGR_LEVEL];                 // 208B
+       uint8_t deviation_gain[NUM_AGGR_LEVEL];                  // 212B
+       uint8_t min_knee[NUM_AGGR_LEVEL];                        // 216B
+       uint8_t max_knee[NUM_AGGR_LEVEL];                        // 220B
+       uint8_t iir_curve[NUM_AMBI_LEVEL];                       // 224B
+       uint8_t pad3[3];                                         // 229B
+
+       uint16_t blRampReduction[NUM_AGGR_LEVEL];                // 232B
+       uint16_t blRampStart[NUM_AGGR_LEVEL];                    // 240B
 };
 
 struct dmub_cmd_abm_set_pipe_data {
index eced40a..5c67e12 100644 (file)
 #include "opp.h"
 #include "color_gamma.h"
 
+/* When calculating LUT values the first region and at least one subsequent
+ * region are calculated with full precision. These defines are a demarcation
+ * of where the second region starts and ends.
+ * These are hardcoded values to avoid recalculating them in loops.
+ */
+#define PRECISE_LUT_REGION_START 224
+#define PRECISE_LUT_REGION_END 239
+
 static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
 
 // these are helpers for calculations to reduce stack usage
@@ -346,7 +354,13 @@ static struct fixed31_32 translate_from_linear_space(
                                        dc_fixpt_recip(args->gamma));
                }
                scratch_1 = dc_fixpt_add(one, args->a3);
-               if (cal_buffer->buffer_index < 16)
+               /* In the first region (first 16 points) and in the
+                * region delimited by START/END we calculate with
+                * full precision to avoid error accumulation. 
+                */
+               if ((cal_buffer->buffer_index >= PRECISE_LUT_REGION_START &&
+                       cal_buffer->buffer_index <= PRECISE_LUT_REGION_END) ||
+                       (cal_buffer->buffer_index < 16))
                        scratch_2 = dc_fixpt_pow(args->arg,
                                        dc_fixpt_recip(args->gamma));
                else
@@ -397,9 +411,7 @@ static struct fixed31_32 translate_from_linear_space_long(
                                        dc_fixpt_recip(args->gamma))),
                                        args->a2);
        else
-               return dc_fixpt_mul(
-                       args->arg,
-                       args->a1);
+               return dc_fixpt_mul(args->arg, args->a1);
 }
 
 static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
@@ -717,7 +729,6 @@ static struct fixed31_32 calculate_mapped_value(
                BREAK_TO_DEBUGGER();
                result = dc_fixpt_zero;
        } else {
-               BREAK_TO_DEBUGGER();
                result = dc_fixpt_one;
        }
 
@@ -976,6 +987,7 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
                cal_buffer->buffer_index = 0; // see var definition for more info
        rgb += 32; // first 32 points have problems with fixed point, too small
        coord_x += 32;
+
        for (i = 32; i <= hw_points_num; i++) {
                if (!is_clipped) {
                        if (use_eetf) {
index c386359..3cb8d4c 100644 (file)
@@ -499,6 +499,7 @@ enum atombios_firmware_capability
        ATOM_FIRMWARE_CAP_HWEMU_UMC_CFG = 0x00000100,
        ATOM_FIRMWARE_CAP_SRAM_ECC      = 0x00000200,
        ATOM_FIRMWARE_CAP_ENABLE_2STAGE_BIST_TRAINING  = 0x00000400,
+       ATOM_FIRMWARE_CAP_ENABLE_2ND_USB20PORT = 0x0008000,
 };
 
 enum atom_cooling_solution_id{
index 89be49a..4bdbcce 100644 (file)
@@ -227,6 +227,7 @@ struct smu_bios_boot_up_values
        uint32_t                        content_revision;
        uint32_t                        fclk;
        uint32_t                        lclk;
+       uint32_t                        firmware_caps;
 };
 
 enum smu_table_id
index 4a6d138..720d156 100644 (file)
        __SMU_DUMMY_MAP(SET_DRIVER_DUMMY_TABLE_DRAM_ADDR_LOW), \
        __SMU_DUMMY_MAP(GET_UMC_FW_WA), \
        __SMU_DUMMY_MAP(Mode1Reset), \
-       __SMU_DUMMY_MAP(Spare),                          \
+       __SMU_DUMMY_MAP(RlcPowerNotify),                 \
        __SMU_DUMMY_MAP(SetHardMinIspiclkByFreq),        \
        __SMU_DUMMY_MAP(SetHardMinIspxclkByFreq),        \
        __SMU_DUMMY_MAP(SetSoftMinSocclkByFreq),         \
        __SMU_DUMMY_MAP(SetSoftMinCclk),                     \
        __SMU_DUMMY_MAP(SetSoftMaxCclk),                     \
        __SMU_DUMMY_MAP(SetGpoFeaturePMask),             \
+       __SMU_DUMMY_MAP(DisallowGpo),                    \
+       __SMU_DUMMY_MAP(Enable2ndUSB20Port),             \
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
index 35dd607..d2e10a7 100644 (file)
 #define PPSMC_MSG_SetGpoFeaturePMask             0x45
 #define PPSMC_MSG_SetSMBUSInterrupt              0x46
 
-#define PPSMC_Message_Count                      0x47
+#define PPSMC_MSG_DisallowGpo                    0x56
+
+#define PPSMC_MSG_Enable2ndUSB20Port             0x57
+
+#define PPSMC_Message_Count                      0x58
 
 #endif
index 7e69b3b..55d7892 100644 (file)
@@ -41,7 +41,7 @@
 #define PPSMC_MSG_PowerUpIspByTile                     0x7
 #define PPSMC_MSG_PowerDownVcn                         0x8 // VCN is power gated by default
 #define PPSMC_MSG_PowerUpVcn                           0x9
-#define PPSMC_MSG_spare                                0xA
+#define PPSMC_MSG_RlcPowerNotify                       0xA
 #define PPSMC_MSG_SetHardMinVcn                        0xB // For wireless display
 #define PPSMC_MSG_SetSoftMinGfxclk                     0xC //Sets SoftMin for GFXCLK. Arg is in MHz
 #define PPSMC_MSG_ActiveProcessNotify                  0xD
index cf999b7..8b867a6 100644 (file)
@@ -847,12 +847,10 @@ static int smu_sw_init(void *handle)
        smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
        smu->smu_dpm.requested_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
 
-       if (!amdgpu_sriov_vf(adev) || (adev->asic_type != CHIP_NAVI12)) {
-               ret = smu_init_microcode(smu);
-               if (ret) {
-                       dev_err(adev->dev, "Failed to load smu firmware!\n");
-                       return ret;
-               }
+       ret = smu_init_microcode(smu);
+       if (ret) {
+               dev_err(adev->dev, "Failed to load smu firmware!\n");
+               return ret;
        }
 
        ret = smu_smc_table_sw_init(smu);
index 3f20f77..9608745 100644 (file)
@@ -128,6 +128,8 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT]
        MSG_MAP(Mode1Reset,                     PPSMC_MSG_Mode1Reset,                  0),
        MSG_MAP(SetMGpuFanBoostLimitRpm,        PPSMC_MSG_SetMGpuFanBoostLimitRpm,     0),
        MSG_MAP(SetGpoFeaturePMask,             PPSMC_MSG_SetGpoFeaturePMask,          0),
+       MSG_MAP(DisallowGpo,                    PPSMC_MSG_DisallowGpo,                 0),
+       MSG_MAP(Enable2ndUSB20Port,             PPSMC_MSG_Enable2ndUSB20Port,          0),
 };
 
 static struct cmn2asic_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = {
@@ -302,6 +304,9 @@ static int sienna_cichlid_check_powerplay_table(struct smu_context *smu)
                table_context->power_play_table;
        struct smu_baco_context *smu_baco = &smu->smu_baco;
 
+       if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_HARDWAREDC)
+               smu->dc_controlled_by_gpio = true;
+
        if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO ||
            powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO)
                smu_baco->platform_support = true;
@@ -377,7 +382,7 @@ static int sienna_cichlid_tables_init(struct smu_context *smu)
                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
        SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-       SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
+       SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetricsExternal_t),
                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
        SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t),
                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
@@ -386,10 +391,10 @@ static int sienna_cichlid_tables_init(struct smu_context *smu)
        SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU11_TOOL_SIZE,
                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
        SMU_TABLE_INIT(tables, SMU_TABLE_ACTIVITY_MONITOR_COEFF,
-                      sizeof(DpmActivityMonitorCoeffInt_t), PAGE_SIZE,
+                      sizeof(DpmActivityMonitorCoeffIntExternal_t), PAGE_SIZE,
                       AMDGPU_GEM_DOMAIN_VRAM);
 
-       smu_table->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
+       smu_table->metrics_table = kzalloc(sizeof(SmuMetricsExternal_t), GFP_KERNEL);
        if (!smu_table->metrics_table)
                goto err0_out;
        smu_table->metrics_time = 0;
@@ -418,7 +423,8 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
                                               uint32_t *value)
 {
        struct smu_table_context *smu_table= &smu->smu_table;
-       SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table;
+       SmuMetrics_t *metrics =
+               &(((SmuMetricsExternal_t *)(smu_table->metrics_table))->SmuMetrics);
        int ret = 0;
 
        mutex_lock(&smu->metrics_lock);
@@ -1065,12 +1071,18 @@ static int sienna_cichlid_populate_umd_state_clk(struct smu_context *smu)
 
        pstate_table->gfxclk_pstate.min = gfx_table->min;
        pstate_table->gfxclk_pstate.peak = gfx_table->max;
+       if (gfx_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK)
+               pstate_table->gfxclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK;
 
        pstate_table->uclk_pstate.min = mem_table->min;
        pstate_table->uclk_pstate.peak = mem_table->max;
+       if (mem_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK)
+               pstate_table->uclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK;
 
        pstate_table->socclk_pstate.min = soc_table->min;
        pstate_table->socclk_pstate.peak = soc_table->max;
+       if (soc_table->max >= SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK)
+               pstate_table->socclk_pstate.standard = SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK;
 
        return 0;
 }
@@ -1156,7 +1168,9 @@ static int sienna_cichlid_get_fan_parameters(struct smu_context *smu)
 
 static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *buf)
 {
-       DpmActivityMonitorCoeffInt_t activity_monitor;
+       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
        uint32_t i, size = 0;
        int16_t workload_type = 0;
        static const char *profile_name[] = {
@@ -1198,7 +1212,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *
 
                result = smu_cmn_update_table(smu,
                                          SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type,
-                                         (void *)(&activity_monitor), false);
+                                         (void *)(&activity_monitor_external), false);
                if (result) {
                        dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
                        return result;
@@ -1211,43 +1225,43 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *
                        " ",
                        0,
                        "GFXCLK",
-                       activity_monitor.Gfx_FPS,
-                       activity_monitor.Gfx_MinFreqStep,
-                       activity_monitor.Gfx_MinActiveFreqType,
-                       activity_monitor.Gfx_MinActiveFreq,
-                       activity_monitor.Gfx_BoosterFreqType,
-                       activity_monitor.Gfx_BoosterFreq,
-                       activity_monitor.Gfx_PD_Data_limit_c,
-                       activity_monitor.Gfx_PD_Data_error_coeff,
-                       activity_monitor.Gfx_PD_Data_error_rate_coeff);
+                       activity_monitor->Gfx_FPS,
+                       activity_monitor->Gfx_MinFreqStep,
+                       activity_monitor->Gfx_MinActiveFreqType,
+                       activity_monitor->Gfx_MinActiveFreq,
+                       activity_monitor->Gfx_BoosterFreqType,
+                       activity_monitor->Gfx_BoosterFreq,
+                       activity_monitor->Gfx_PD_Data_limit_c,
+                       activity_monitor->Gfx_PD_Data_error_coeff,
+                       activity_monitor->Gfx_PD_Data_error_rate_coeff);
 
                size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n",
                        " ",
                        1,
                        "SOCCLK",
-                       activity_monitor.Fclk_FPS,
-                       activity_monitor.Fclk_MinFreqStep,
-                       activity_monitor.Fclk_MinActiveFreqType,
-                       activity_monitor.Fclk_MinActiveFreq,
-                       activity_monitor.Fclk_BoosterFreqType,
-                       activity_monitor.Fclk_BoosterFreq,
-                       activity_monitor.Fclk_PD_Data_limit_c,
-                       activity_monitor.Fclk_PD_Data_error_coeff,
-                       activity_monitor.Fclk_PD_Data_error_rate_coeff);
+                       activity_monitor->Fclk_FPS,
+                       activity_monitor->Fclk_MinFreqStep,
+                       activity_monitor->Fclk_MinActiveFreqType,
+                       activity_monitor->Fclk_MinActiveFreq,
+                       activity_monitor->Fclk_BoosterFreqType,
+                       activity_monitor->Fclk_BoosterFreq,
+                       activity_monitor->Fclk_PD_Data_limit_c,
+                       activity_monitor->Fclk_PD_Data_error_coeff,
+                       activity_monitor->Fclk_PD_Data_error_rate_coeff);
 
                size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n",
                        " ",
                        2,
                        "MEMLK",
-                       activity_monitor.Mem_FPS,
-                       activity_monitor.Mem_MinFreqStep,
-                       activity_monitor.Mem_MinActiveFreqType,
-                       activity_monitor.Mem_MinActiveFreq,
-                       activity_monitor.Mem_BoosterFreqType,
-                       activity_monitor.Mem_BoosterFreq,
-                       activity_monitor.Mem_PD_Data_limit_c,
-                       activity_monitor.Mem_PD_Data_error_coeff,
-                       activity_monitor.Mem_PD_Data_error_rate_coeff);
+                       activity_monitor->Mem_FPS,
+                       activity_monitor->Mem_MinFreqStep,
+                       activity_monitor->Mem_MinActiveFreqType,
+                       activity_monitor->Mem_MinActiveFreq,
+                       activity_monitor->Mem_BoosterFreqType,
+                       activity_monitor->Mem_BoosterFreq,
+                       activity_monitor->Mem_PD_Data_limit_c,
+                       activity_monitor->Mem_PD_Data_error_coeff,
+                       activity_monitor->Mem_PD_Data_error_rate_coeff);
        }
 
        return size;
@@ -1255,7 +1269,10 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char *
 
 static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
 {
-       DpmActivityMonitorCoeffInt_t activity_monitor;
+
+       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
        int workload_type, ret = 0;
 
        smu->power_profile_mode = input[size];
@@ -1269,7 +1286,7 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *
 
                ret = smu_cmn_update_table(smu,
                                       SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
-                                      (void *)(&activity_monitor), false);
+                                      (void *)(&activity_monitor_external), false);
                if (ret) {
                        dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
                        return ret;
@@ -1277,43 +1294,43 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *
 
                switch (input[0]) {
                case 0: /* Gfxclk */
-                       activity_monitor.Gfx_FPS = input[1];
-                       activity_monitor.Gfx_MinFreqStep = input[2];
-                       activity_monitor.Gfx_MinActiveFreqType = input[3];
-                       activity_monitor.Gfx_MinActiveFreq = input[4];
-                       activity_monitor.Gfx_BoosterFreqType = input[5];
-                       activity_monitor.Gfx_BoosterFreq = input[6];
-                       activity_monitor.Gfx_PD_Data_limit_c = input[7];
-                       activity_monitor.Gfx_PD_Data_error_coeff = input[8];
-                       activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
+                       activity_monitor->Gfx_FPS = input[1];
+                       activity_monitor->Gfx_MinFreqStep = input[2];
+                       activity_monitor->Gfx_MinActiveFreqType = input[3];
+                       activity_monitor->Gfx_MinActiveFreq = input[4];
+                       activity_monitor->Gfx_BoosterFreqType = input[5];
+                       activity_monitor->Gfx_BoosterFreq = input[6];
+                       activity_monitor->Gfx_PD_Data_limit_c = input[7];
+                       activity_monitor->Gfx_PD_Data_error_coeff = input[8];
+                       activity_monitor->Gfx_PD_Data_error_rate_coeff = input[9];
                        break;
                case 1: /* Socclk */
-                       activity_monitor.Fclk_FPS = input[1];
-                       activity_monitor.Fclk_MinFreqStep = input[2];
-                       activity_monitor.Fclk_MinActiveFreqType = input[3];
-                       activity_monitor.Fclk_MinActiveFreq = input[4];
-                       activity_monitor.Fclk_BoosterFreqType = input[5];
-                       activity_monitor.Fclk_BoosterFreq = input[6];
-                       activity_monitor.Fclk_PD_Data_limit_c = input[7];
-                       activity_monitor.Fclk_PD_Data_error_coeff = input[8];
-                       activity_monitor.Fclk_PD_Data_error_rate_coeff = input[9];
+                       activity_monitor->Fclk_FPS = input[1];
+                       activity_monitor->Fclk_MinFreqStep = input[2];
+                       activity_monitor->Fclk_MinActiveFreqType = input[3];
+                       activity_monitor->Fclk_MinActiveFreq = input[4];
+                       activity_monitor->Fclk_BoosterFreqType = input[5];
+                       activity_monitor->Fclk_BoosterFreq = input[6];
+                       activity_monitor->Fclk_PD_Data_limit_c = input[7];
+                       activity_monitor->Fclk_PD_Data_error_coeff = input[8];
+                       activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9];
                        break;
                case 2: /* Memlk */
-                       activity_monitor.Mem_FPS = input[1];
-                       activity_monitor.Mem_MinFreqStep = input[2];
-                       activity_monitor.Mem_MinActiveFreqType = input[3];
-                       activity_monitor.Mem_MinActiveFreq = input[4];
-                       activity_monitor.Mem_BoosterFreqType = input[5];
-                       activity_monitor.Mem_BoosterFreq = input[6];
-                       activity_monitor.Mem_PD_Data_limit_c = input[7];
-                       activity_monitor.Mem_PD_Data_error_coeff = input[8];
-                       activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
+                       activity_monitor->Mem_FPS = input[1];
+                       activity_monitor->Mem_MinFreqStep = input[2];
+                       activity_monitor->Mem_MinActiveFreqType = input[3];
+                       activity_monitor->Mem_MinActiveFreq = input[4];
+                       activity_monitor->Mem_BoosterFreqType = input[5];
+                       activity_monitor->Mem_BoosterFreq = input[6];
+                       activity_monitor->Mem_PD_Data_limit_c = input[7];
+                       activity_monitor->Mem_PD_Data_error_coeff = input[8];
+                       activity_monitor->Mem_PD_Data_error_rate_coeff = input[9];
                        break;
                }
 
                ret = smu_cmn_update_table(smu,
                                       SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
-                                      (void *)(&activity_monitor), true);
+                                      (void *)(&activity_monitor_external), true);
                if (ret) {
                        dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
                        return ret;
@@ -2582,52 +2599,54 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
        struct smu_table_context *smu_table = &smu->smu_table;
        struct gpu_metrics_v1_0 *gpu_metrics =
                (struct gpu_metrics_v1_0 *)smu_table->gpu_metrics_table;
-       SmuMetrics_t metrics;
+       SmuMetricsExternal_t metrics_external;
+       SmuMetrics_t *metrics =
+               &(metrics_external.SmuMetrics);
        int ret = 0;
 
        ret = smu_cmn_get_metrics_table(smu,
-                                       &metrics,
+                                       &metrics_external,
                                        true);
        if (ret)
                return ret;
 
        smu_v11_0_init_gpu_metrics_v1_0(gpu_metrics);
 
-       gpu_metrics->temperature_edge = metrics.TemperatureEdge;
-       gpu_metrics->temperature_hotspot = metrics.TemperatureHotspot;
-       gpu_metrics->temperature_mem = metrics.TemperatureMem;
-       gpu_metrics->temperature_vrgfx = metrics.TemperatureVrGfx;
-       gpu_metrics->temperature_vrsoc = metrics.TemperatureVrSoc;
-       gpu_metrics->temperature_vrmem = metrics.TemperatureVrMem0;
+       gpu_metrics->temperature_edge = metrics->TemperatureEdge;
+       gpu_metrics->temperature_hotspot = metrics->TemperatureHotspot;
+       gpu_metrics->temperature_mem = metrics->TemperatureMem;
+       gpu_metrics->temperature_vrgfx = metrics->TemperatureVrGfx;
+       gpu_metrics->temperature_vrsoc = metrics->TemperatureVrSoc;
+       gpu_metrics->temperature_vrmem = metrics->TemperatureVrMem0;
 
-       gpu_metrics->average_gfx_activity = metrics.AverageGfxActivity;
-       gpu_metrics->average_umc_activity = metrics.AverageUclkActivity;
-       gpu_metrics->average_mm_activity = metrics.VcnActivityPercentage;
+       gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity;
+       gpu_metrics->average_umc_activity = metrics->AverageUclkActivity;
+       gpu_metrics->average_mm_activity = metrics->VcnActivityPercentage;
 
-       gpu_metrics->average_socket_power = metrics.AverageSocketPower;
-       gpu_metrics->energy_accumulator = metrics.EnergyAccumulator;
+       gpu_metrics->average_socket_power = metrics->AverageSocketPower;
+       gpu_metrics->energy_accumulator = metrics->EnergyAccumulator;
 
-       if (metrics.AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD)
-               gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequencyPostDs;
+       if (metrics->AverageGfxActivity <= SMU_11_0_7_GFX_BUSY_THRESHOLD)
+               gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs;
        else
-               gpu_metrics->average_gfxclk_frequency = metrics.AverageGfxclkFrequencyPreDs;
-       gpu_metrics->average_uclk_frequency = metrics.AverageUclkFrequencyPostDs;
-       gpu_metrics->average_vclk0_frequency = metrics.AverageVclk0Frequency;
-       gpu_metrics->average_dclk0_frequency = metrics.AverageDclk0Frequency;
-       gpu_metrics->average_vclk1_frequency = metrics.AverageVclk1Frequency;
-       gpu_metrics->average_dclk1_frequency = metrics.AverageDclk1Frequency;
+               gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs;
+       gpu_metrics->average_uclk_frequency = metrics->AverageUclkFrequencyPostDs;
+       gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency;
+       gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency;
+       gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency;
+       gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency;
 
-       gpu_metrics->current_gfxclk = metrics.CurrClock[PPCLK_GFXCLK];
-       gpu_metrics->current_socclk = metrics.CurrClock[PPCLK_SOCCLK];
-       gpu_metrics->current_uclk = metrics.CurrClock[PPCLK_UCLK];
-       gpu_metrics->current_vclk0 = metrics.CurrClock[PPCLK_VCLK_0];
-       gpu_metrics->current_dclk0 = metrics.CurrClock[PPCLK_DCLK_0];
-       gpu_metrics->current_vclk1 = metrics.CurrClock[PPCLK_VCLK_1];
-       gpu_metrics->current_dclk1 = metrics.CurrClock[PPCLK_DCLK_1];
+       gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK];
+       gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK];
+       gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK];
+       gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0];
+       gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0];
+       gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_1];
+       gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_1];
 
-       gpu_metrics->throttle_status = metrics.ThrottlerStatus;
+       gpu_metrics->throttle_status = metrics->ThrottlerStatus;
 
-       gpu_metrics->current_fan_speed = metrics.CurrFanSpeed;
+       gpu_metrics->current_fan_speed = metrics->CurrFanSpeed;
 
        gpu_metrics->pcie_link_width =
                        smu_v11_0_get_current_pcie_link_width(smu);
@@ -2650,23 +2669,82 @@ static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu)
 static int sienna_cichlid_gpo_control(struct smu_context *smu,
                                      bool enablement)
 {
+       uint32_t smu_version;
        int ret = 0;
 
+
        if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DPM_GFX_GPO_BIT)) {
-               if (enablement)
-                       ret = smu_cmn_send_smc_msg_with_param(smu,
-                                                       SMU_MSG_SetGpoFeaturePMask,
-                                                       GFX_GPO_PACE_MASK | GFX_GPO_DEM_MASK,
-                                                       NULL);
-               else
-                       ret = smu_cmn_send_smc_msg_with_param(smu,
-                                                       SMU_MSG_SetGpoFeaturePMask,
-                                                       0,
-                                                       NULL);
+               ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
+               if (ret)
+                       return ret;
+
+               if (enablement) {
+                       if (smu_version < 0x003a2500) {
+                               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                                     SMU_MSG_SetGpoFeaturePMask,
+                                                                     GFX_GPO_PACE_MASK | GFX_GPO_DEM_MASK,
+                                                                     NULL);
+                       } else {
+                               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                                     SMU_MSG_DisallowGpo,
+                                                                     0,
+                                                                     NULL);
+                       }
+               } else {
+                       if (smu_version < 0x003a2500) {
+                               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                                     SMU_MSG_SetGpoFeaturePMask,
+                                                                     0,
+                                                                     NULL);
+                       } else {
+                               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                                     SMU_MSG_DisallowGpo,
+                                                                     1,
+                                                                     NULL);
+                       }
+               }
        }
 
        return ret;
 }
+
+static int sienna_cichlid_notify_2nd_usb20_port(struct smu_context *smu)
+{
+       uint32_t smu_version;
+       int ret = 0;
+
+       ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
+       if (ret)
+               return ret;
+
+       /*
+        * Message SMU_MSG_Enable2ndUSB20Port is supported by 58.45
+        * onwards PMFWs.
+        */
+       if (smu_version < 0x003A2D00)
+               return 0;
+
+       return smu_cmn_send_smc_msg_with_param(smu,
+                                              SMU_MSG_Enable2ndUSB20Port,
+                                              smu->smu_table.boot_values.firmware_caps & ATOM_FIRMWARE_CAP_ENABLE_2ND_USB20PORT ?
+                                              1 : 0,
+                                              NULL);
+}
+
+static int sienna_cichlid_system_features_control(struct smu_context *smu,
+                                                 bool en)
+{
+       int ret = 0;
+
+       if (en) {
+               ret = sienna_cichlid_notify_2nd_usb20_port(smu);
+               if (ret)
+                       return ret;
+       }
+
+       return smu_v11_0_system_features_control(smu, en);
+}
+
 static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
        .get_allowed_feature_mask = sienna_cichlid_get_allowed_feature_mask,
        .set_default_dpm_table = sienna_cichlid_set_default_dpm_table,
@@ -2707,7 +2785,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
        .set_driver_table_location = smu_v11_0_set_driver_table_location,
        .set_tool_table_location = smu_v11_0_set_tool_table_location,
        .notify_memory_pool_location = smu_v11_0_notify_memory_pool_location,
-       .system_features_control = smu_v11_0_system_features_control,
+       .system_features_control = sienna_cichlid_system_features_control,
        .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param,
        .send_smc_msg = smu_cmn_send_smc_msg,
        .init_display_count = NULL,
@@ -2740,6 +2818,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
        .get_dpm_ultimate_freq = sienna_cichlid_get_dpm_ultimate_freq,
        .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
        .run_btc = sienna_cichlid_run_btc,
+       .set_power_source = smu_v11_0_set_power_source,
        .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
        .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
        .get_gpu_metrics = sienna_cichlid_get_gpu_metrics,
index 57e120c..38cd0ec 100644 (file)
@@ -29,6 +29,10 @@ typedef enum {
   POWER_SOURCE_COUNT,
 } POWER_SOURCE_e;
 
+#define SIENNA_CICHLID_UMD_PSTATE_PROFILING_GFXCLK    1825
+#define SIENNA_CICHLID_UMD_PSTATE_PROFILING_SOCCLK    960
+#define SIENNA_CICHLID_UMD_PSTATE_PROFILING_MEMCLK    1000
+
 extern void sienna_cichlid_set_ppt_funcs(struct smu_context *smu);
 
 #endif
index 624065d..b279dbb 100644 (file)
@@ -91,6 +91,11 @@ int smu_v11_0_init_microcode(struct smu_context *smu)
        const struct common_firmware_header *header;
        struct amdgpu_firmware_info *ucode = NULL;
 
+       if (amdgpu_sriov_vf(adev) &&
+                       ((adev->asic_type == CHIP_NAVI12) ||
+                        (adev->asic_type == CHIP_SIENNA_CICHLID)))
+               return 0;
+
        switch (adev->asic_type) {
        case CHIP_ARCTURUS:
                chip_name = "arcturus";
@@ -554,6 +559,7 @@ int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu)
                smu->smu_table.boot_values.vdd_gfx = v_3_1->bootup_vddgfx_mv;
                smu->smu_table.boot_values.cooling_id = v_3_1->coolingsolution_id;
                smu->smu_table.boot_values.pp_table_id = 0;
+               smu->smu_table.boot_values.firmware_caps = v_3_1->firmware_capability;
                break;
        case 3:
        default:
@@ -569,6 +575,7 @@ int smu_v11_0_get_vbios_bootup_values(struct smu_context *smu)
                smu->smu_table.boot_values.vdd_gfx = v_3_3->bootup_vddgfx_mv;
                smu->smu_table.boot_values.cooling_id = v_3_3->coolingsolution_id;
                smu->smu_table.boot_values.pp_table_id = v_3_3->pplib_pptable_id;
+               smu->smu_table.boot_values.firmware_caps = v_3_3->firmware_capability;
        }
 
        smu->smu_table.boot_values.format_revision = header->format_revision;
@@ -929,9 +936,13 @@ int smu_v11_0_get_current_power_limit(struct smu_context *smu,
        if (power_src < 0)
                return -EINVAL;
 
+       /*
+        * BIT 24-31: ControllerId (only PPT0 is supported for now)
+        * BIT 16-23: PowerSource
+        */
        ret = smu_cmn_send_smc_msg_with_param(smu,
                                          SMU_MSG_GetPptLimit,
-                                         power_src << 16,
+                                         (0 << 24) | (power_src << 16),
                                          power_limit);
        if (ret)
                dev_err(smu->adev->dev, "[%s] get PPT limit failed!", __func__);
@@ -941,6 +952,7 @@ int smu_v11_0_get_current_power_limit(struct smu_context *smu,
 
 int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
 {
+       int power_src;
        int ret = 0;
 
        if (!smu_cmn_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) {
@@ -948,6 +960,22 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n)
                return -EOPNOTSUPP;
        }
 
+       power_src = smu_cmn_to_asic_specific_index(smu,
+                                       CMN2ASIC_MAPPING_PWR,
+                                       smu->adev->pm.ac_power ?
+                                       SMU_POWER_SOURCE_AC :
+                                       SMU_POWER_SOURCE_DC);
+       if (power_src < 0)
+               return -EINVAL;
+
+       /*
+        * BIT 24-31: ControllerId (only PPT0 is supported for now)
+        * BIT 16-23: PowerSource
+        * BIT 0-15: PowerLimit
+        */
+       n &= 0xFFFF;
+       n |= 0 << 24;
+       n |= (power_src) << 16;
        ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetPptLimit, n, NULL);
        if (ret) {
                dev_err(smu->adev->dev, "[%s] Set power limit Failed!\n", __func__);
@@ -2064,6 +2092,22 @@ int smu_v11_0_deep_sleep_control(struct smu_context *smu,
                }
        }
 
+       if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DS_UCLK_BIT)) {
+               ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_DS_UCLK_BIT, enablement);
+               if (ret) {
+                       dev_err(adev->dev, "Failed to %s UCLK DS!\n", enablement ? "enable" : "disable");
+                       return ret;
+               }
+       }
+
+       if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DS_FCLK_BIT)) {
+               ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_DS_FCLK_BIT, enablement);
+               if (ret) {
+                       dev_err(adev->dev, "Failed to %s FCLK DS!\n", enablement ? "enable" : "disable");
+                       return ret;
+               }
+       }
+
        if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_DS_SOCCLK_BIT)) {
                ret = smu_cmn_feature_set_enabled(smu, SMU_FEATURE_DS_SOCCLK_BIT, enablement);
                if (ret) {
index a81e5c8..9bccf2a 100644 (file)
@@ -64,7 +64,7 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = {
        MSG_MAP(PowerUpIspByTile,               PPSMC_MSG_PowerUpIspByTile,             0),
        MSG_MAP(PowerDownVcn,                   PPSMC_MSG_PowerDownVcn,                 0),
        MSG_MAP(PowerUpVcn,                     PPSMC_MSG_PowerUpVcn,                   0),
-       MSG_MAP(Spare,                          PPSMC_MSG_spare,                                0),
+       MSG_MAP(RlcPowerNotify,                 PPSMC_MSG_RlcPowerNotify,               0),
        MSG_MAP(SetHardMinVcn,                  PPSMC_MSG_SetHardMinVcn,                0),
        MSG_MAP(SetSoftMinGfxclk,               PPSMC_MSG_SetSoftMinGfxclk,             0),
        MSG_MAP(ActiveProcessNotify,            PPSMC_MSG_ActiveProcessNotify,          0),
@@ -722,6 +722,12 @@ static int vangogh_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
        return 0;
 }
 
+static int vangogh_system_features_control(struct smu_context *smu, bool en)
+{
+       return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
+                                       en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
+}
+
 static const struct pptable_funcs vangogh_ppt_funcs = {
 
        .check_fw_status = smu_v11_0_check_fw_status,
@@ -749,6 +755,7 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
        .print_clk_levels = vangogh_print_fine_grain_clk,
        .set_default_dpm_table = vangogh_set_default_dpm_tables,
        .set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters,
+       .system_features_control = vangogh_system_features_control,
 };
 
 void vangogh_set_ppt_funcs(struct smu_context *smu)
index 8756766..eab4554 100644 (file)
@@ -32,4 +32,8 @@ extern void vangogh_set_ppt_funcs(struct smu_context *smu);
 #define VANGOGH_UMD_PSTATE_SOCCLK       678
 #define VANGOGH_UMD_PSTATE_FCLK         800
 
+/* RLC Power Status */
+#define RLC_STATUS_OFF          0
+#define RLC_STATUS_NORMAL       1
+
 #endif
index ddd0e32..ba15070 100644 (file)
@@ -122,7 +122,8 @@ static int handle_conflicting_encoders(struct drm_atomic_state *state,
                        continue;
 
                if (funcs->atomic_best_encoder)
-                       new_encoder = funcs->atomic_best_encoder(connector, new_conn_state);
+                       new_encoder = funcs->atomic_best_encoder(connector,
+                                                                state);
                else if (funcs->best_encoder)
                        new_encoder = funcs->best_encoder(connector);
                else
@@ -345,8 +346,7 @@ update_connector_routing(struct drm_atomic_state *state,
        funcs = connector->helper_private;
 
        if (funcs->atomic_best_encoder)
-               new_encoder = funcs->atomic_best_encoder(connector,
-                                                        new_connector_state);
+               new_encoder = funcs->atomic_best_encoder(connector, state);
        else if (funcs->best_encoder)
                new_encoder = funcs->best_encoder(connector);
        else
@@ -1313,7 +1313,7 @@ static void drm_atomic_helper_commit_writebacks(struct drm_device *dev,
 
                if (new_conn_state->writeback_job && new_conn_state->writeback_job->fb) {
                        WARN_ON(connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK);
-                       funcs->atomic_commit(connector, new_conn_state);
+                       funcs->atomic_commit(connector, old_state);
                }
        }
 }
index ae2234a..5c2141e 100644 (file)
  * exposed and assumed to be black).
  *
  * SCALING_FILTER:
- *
  *     Indicates scaling filter to be used for plane scaler
  *
  *     The value of this property can be one of the following:
+ *
  *     Default:
  *             Driver's default scaling filter
  *     Nearest Neighbor:
index 7a01d09..aeb1327 100644 (file)
@@ -77,6 +77,7 @@ static struct drm_map_list *drm_find_matching_map(struct drm_device *dev,
                        if ((entry->map->offset & 0xffffffff) ==
                            (map->offset & 0xffffffff))
                                return entry;
+                       break;
                default: /* Make gcc happy */
                        ;
                }
index fe573ac..ce45e38 100644 (file)
@@ -314,9 +314,6 @@ drm_client_buffer_vmap(struct drm_client_buffer *buffer, struct dma_buf_map *map
        struct dma_buf_map *map = &buffer->map;
        int ret;
 
-       if (dma_buf_map_is_set(map))
-               goto out;
-
        /*
         * FIXME: The dependency on GEM here isn't required, we could
         * convert the driver handle to a dma-buf instead and use the
@@ -329,7 +326,6 @@ drm_client_buffer_vmap(struct drm_client_buffer *buffer, struct dma_buf_map *map
        if (ret)
                return ret;
 
-out:
        *map_copy = *map;
 
        return 0;
index f927976..74090fc 100644 (file)
@@ -230,14 +230,14 @@ struct dma_fence *drm_crtc_create_fence(struct drm_crtc *crtc)
  *
  *     Setting MODE_ID to 0 will release reserved resources for the CRTC.
  * SCALING_FILTER:
- *     Atomic property for setting the scaling filter for CRTC scaler
+ *     Atomic property for setting the scaling filter for CRTC scaler
  *
- *     The value of this property can be one of the following:
- *     Default:
- *             Driver's default scaling filter
- *     Nearest Neighbor:
- *             Nearest Neighbor scaling filter
+ *     The value of this property can be one of the following:
  *
+ *     Default:
+ *             Driver's default scaling filter
+ *     Nearest Neighbor:
+ *             Nearest Neighbor scaling filter
  */
 
 /**
index 25edf67..4b81195 100644 (file)
@@ -371,9 +371,9 @@ static void drm_fb_helper_resume_worker(struct work_struct *work)
        console_unlock();
 }
 
-static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper,
-                                         struct drm_clip_rect *clip,
-                                         struct dma_buf_map *dst)
+static void drm_fb_helper_damage_blit_real(struct drm_fb_helper *fb_helper,
+                                          struct drm_clip_rect *clip,
+                                          struct dma_buf_map *dst)
 {
        struct drm_framebuffer *fb = fb_helper->fb;
        unsigned int cpp = fb->format->cpp[0];
@@ -391,40 +391,86 @@ static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper,
        }
 }
 
-static void drm_fb_helper_dirty_work(struct work_struct *work)
+static int drm_fb_helper_damage_blit(struct drm_fb_helper *fb_helper,
+                                    struct drm_clip_rect *clip)
+{
+       struct drm_client_buffer *buffer = fb_helper->buffer;
+       struct dma_buf_map map, dst;
+       int ret;
+
+       /*
+        * We have to pin the client buffer to its current location while
+        * flushing the shadow buffer. In the general case, concurrent
+        * modesetting operations could try to move the buffer and would
+        * fail. The modeset has to be serialized by acquiring the reservation
+        * object of the underlying BO here.
+        *
+        * For fbdev emulation, we only have to protect against fbdev modeset
+        * operations. Nothing else will involve the client buffer's BO. So it
+        * is sufficient to acquire struct drm_fb_helper.lock here.
+        */
+       mutex_lock(&fb_helper->lock);
+
+       ret = drm_client_buffer_vmap(buffer, &map);
+       if (ret)
+               goto out;
+
+       dst = map;
+       drm_fb_helper_damage_blit_real(fb_helper, clip, &dst);
+
+       drm_client_buffer_vunmap(buffer);
+
+out:
+       mutex_unlock(&fb_helper->lock);
+
+       return ret;
+}
+
+static void drm_fb_helper_damage_work(struct work_struct *work)
 {
        struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
-                                                   dirty_work);
-       struct drm_clip_rect *clip = &helper->dirty_clip;
+                                                   damage_work);
+       struct drm_device *dev = helper->dev;
+       struct drm_clip_rect *clip = &helper->damage_clip;
        struct drm_clip_rect clip_copy;
        unsigned long flags;
-       struct dma_buf_map map;
        int ret;
 
-       spin_lock_irqsave(&helper->dirty_lock, flags);
+       spin_lock_irqsave(&helper->damage_lock, flags);
        clip_copy = *clip;
        clip->x1 = clip->y1 = ~0;
        clip->x2 = clip->y2 = 0;
-       spin_unlock_irqrestore(&helper->dirty_lock, flags);
+       spin_unlock_irqrestore(&helper->damage_lock, flags);
 
-       /* call dirty callback only when it has been really touched */
-       if (clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2) {
-
-               /* Generic fbdev uses a shadow buffer */
-               if (helper->buffer) {
-                       ret = drm_client_buffer_vmap(helper->buffer, &map);
-                       if (ret)
-                               return;
-                       drm_fb_helper_dirty_blit_real(helper, &clip_copy, &map);
-               }
+       /* Call damage handlers only if necessary */
+       if (!(clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2))
+               return;
 
-               if (helper->fb->funcs->dirty)
-                       helper->fb->funcs->dirty(helper->fb, NULL, 0, 0,
-                                                &clip_copy, 1);
+       if (helper->buffer) {
+               ret = drm_fb_helper_damage_blit(helper, &clip_copy);
+               if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret))
+                       goto err;
+       }
 
-               if (helper->buffer)
-                       drm_client_buffer_vunmap(helper->buffer);
+       if (helper->fb->funcs->dirty) {
+               ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1);
+               if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret))
+                       goto err;
        }
+
+       return;
+
+err:
+       /*
+        * Restore damage clip rectangle on errors. The next run
+        * of the damage worker will perform the update.
+        */
+       spin_lock_irqsave(&helper->damage_lock, flags);
+       clip->x1 = min_t(u32, clip->x1, clip_copy.x1);
+       clip->y1 = min_t(u32, clip->y1, clip_copy.y1);
+       clip->x2 = max_t(u32, clip->x2, clip_copy.x2);
+       clip->y2 = max_t(u32, clip->y2, clip_copy.y2);
+       spin_unlock_irqrestore(&helper->damage_lock, flags);
 }
 
 /**
@@ -440,10 +486,10 @@ void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper,
                           const struct drm_fb_helper_funcs *funcs)
 {
        INIT_LIST_HEAD(&helper->kernel_fb_list);
-       spin_lock_init(&helper->dirty_lock);
+       spin_lock_init(&helper->damage_lock);
        INIT_WORK(&helper->resume_work, drm_fb_helper_resume_worker);
-       INIT_WORK(&helper->dirty_work, drm_fb_helper_dirty_work);
-       helper->dirty_clip.x1 = helper->dirty_clip.y1 = ~0;
+       INIT_WORK(&helper->damage_work, drm_fb_helper_damage_work);
+       helper->damage_clip.x1 = helper->damage_clip.y1 = ~0;
        mutex_init(&helper->lock);
        helper->funcs = funcs;
        helper->dev = dev;
@@ -579,7 +625,7 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
                return;
 
        cancel_work_sync(&fb_helper->resume_work);
-       cancel_work_sync(&fb_helper->dirty_work);
+       cancel_work_sync(&fb_helper->damage_work);
 
        info = fb_helper->fbdev;
        if (info) {
@@ -614,30 +660,30 @@ static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper)
               fb->funcs->dirty;
 }
 
-static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y,
-                               u32 width, u32 height)
+static void drm_fb_helper_damage(struct fb_info *info, u32 x, u32 y,
+                                u32 width, u32 height)
 {
        struct drm_fb_helper *helper = info->par;
-       struct drm_clip_rect *clip = &helper->dirty_clip;
+       struct drm_clip_rect *clip = &helper->damage_clip;
        unsigned long flags;
 
        if (!drm_fbdev_use_shadow_fb(helper))
                return;
 
-       spin_lock_irqsave(&helper->dirty_lock, flags);
+       spin_lock_irqsave(&helper->damage_lock, flags);
        clip->x1 = min_t(u32, clip->x1, x);
        clip->y1 = min_t(u32, clip->y1, y);
        clip->x2 = max_t(u32, clip->x2, x + width);
        clip->y2 = max_t(u32, clip->y2, y + height);
-       spin_unlock_irqrestore(&helper->dirty_lock, flags);
+       spin_unlock_irqrestore(&helper->damage_lock, flags);
 
-       schedule_work(&helper->dirty_work);
+       schedule_work(&helper->damage_work);
 }
 
 /**
  * drm_fb_helper_deferred_io() - fbdev deferred_io callback function
  * @info: fb_info struct pointer
- * @pagelist: list of dirty mmap framebuffer pages
+ * @pagelist: list of mmap framebuffer pages that have to be flushed
  *
  * This function is used as the &fb_deferred_io.deferred_io
  * callback function for flushing the fbdev mmap writes.
@@ -662,7 +708,7 @@ void drm_fb_helper_deferred_io(struct fb_info *info,
                y1 = min / info->fix.line_length;
                y2 = min_t(u32, DIV_ROUND_UP(max, info->fix.line_length),
                           info->var.yres);
-               drm_fb_helper_dirty(info, 0, y1, info->var.xres, y2 - y1);
+               drm_fb_helper_damage(info, 0, y1, info->var.xres, y2 - y1);
        }
 }
 EXPORT_SYMBOL(drm_fb_helper_deferred_io);
@@ -699,8 +745,7 @@ ssize_t drm_fb_helper_sys_write(struct fb_info *info, const char __user *buf,
 
        ret = fb_sys_write(info, buf, count, ppos);
        if (ret > 0)
-               drm_fb_helper_dirty(info, 0, 0, info->var.xres,
-                                   info->var.yres);
+               drm_fb_helper_damage(info, 0, 0, info->var.xres, info->var.yres);
 
        return ret;
 }
@@ -717,8 +762,7 @@ void drm_fb_helper_sys_fillrect(struct fb_info *info,
                                const struct fb_fillrect *rect)
 {
        sys_fillrect(info, rect);
-       drm_fb_helper_dirty(info, rect->dx, rect->dy,
-                           rect->width, rect->height);
+       drm_fb_helper_damage(info, rect->dx, rect->dy, rect->width, rect->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_fillrect);
 
@@ -733,8 +777,7 @@ void drm_fb_helper_sys_copyarea(struct fb_info *info,
                                const struct fb_copyarea *area)
 {
        sys_copyarea(info, area);
-       drm_fb_helper_dirty(info, area->dx, area->dy,
-                           area->width, area->height);
+       drm_fb_helper_damage(info, area->dx, area->dy, area->width, area->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_copyarea);
 
@@ -749,8 +792,7 @@ void drm_fb_helper_sys_imageblit(struct fb_info *info,
                                 const struct fb_image *image)
 {
        sys_imageblit(info, image);
-       drm_fb_helper_dirty(info, image->dx, image->dy,
-                           image->width, image->height);
+       drm_fb_helper_damage(info, image->dx, image->dy, image->width, image->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_imageblit);
 
@@ -765,8 +807,7 @@ void drm_fb_helper_cfb_fillrect(struct fb_info *info,
                                const struct fb_fillrect *rect)
 {
        cfb_fillrect(info, rect);
-       drm_fb_helper_dirty(info, rect->dx, rect->dy,
-                           rect->width, rect->height);
+       drm_fb_helper_damage(info, rect->dx, rect->dy, rect->width, rect->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_cfb_fillrect);
 
@@ -781,8 +822,7 @@ void drm_fb_helper_cfb_copyarea(struct fb_info *info,
                                const struct fb_copyarea *area)
 {
        cfb_copyarea(info, area);
-       drm_fb_helper_dirty(info, area->dx, area->dy,
-                           area->width, area->height);
+       drm_fb_helper_damage(info, area->dx, area->dy, area->width, area->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_cfb_copyarea);
 
@@ -797,8 +837,7 @@ void drm_fb_helper_cfb_imageblit(struct fb_info *info,
                                 const struct fb_image *image)
 {
        cfb_imageblit(info, image);
-       drm_fb_helper_dirty(info, image->dx, image->dy,
-                           image->width, image->height);
+       drm_fb_helper_damage(info, image->dx, image->dy, image->width, image->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_cfb_imageblit);
 
@@ -1988,14 +2027,19 @@ static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper)
        if (!fb_helper->dev)
                return;
 
-       if (fbi && fbi->fbdefio) {
-               fb_deferred_io_cleanup(fbi);
-               shadow = fbi->screen_buffer;
+       if (fbi) {
+               if (fbi->fbdefio)
+                       fb_deferred_io_cleanup(fbi);
+               if (drm_fbdev_use_shadow_fb(fb_helper))
+                       shadow = fbi->screen_buffer;
        }
 
        drm_fb_helper_fini(fb_helper);
 
-       vfree(shadow);
+       if (shadow)
+               vfree(shadow);
+       else
+               drm_client_buffer_vunmap(fb_helper->buffer);
 
        drm_client_framebuffer_delete(fb_helper->buffer);
 }
@@ -2189,6 +2233,9 @@ static ssize_t drm_fbdev_fb_write(struct fb_info *info, const char __user *buf,
        if (ret > 0)
                *ppos += ret;
 
+       if (ret > 0)
+               drm_fb_helper_damage(info, 0, 0, info->var.xres_virtual, info->var.yres_virtual);
+
        return ret ? ret : err;
 }
 
index 499189c..9825c37 100644 (file)
@@ -51,13 +51,17 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private)
        if (!obj)
                return ERR_PTR(-ENOMEM);
 
+       shmem = to_drm_gem_shmem_obj(obj);
+
        if (!obj->funcs)
                obj->funcs = &drm_gem_shmem_funcs;
 
-       if (private)
+       if (private) {
                drm_gem_private_object_init(dev, obj, size);
-       else
+               shmem->map_wc = false; /* dma-buf mappings use always writecombine */
+       } else {
                ret = drm_gem_object_init(dev, obj, size);
+       }
        if (ret)
                goto err_free;
 
@@ -65,7 +69,6 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private)
        if (ret)
                goto err_release;
 
-       shmem = to_drm_gem_shmem_obj(obj);
        mutex_init(&shmem->pages_lock);
        mutex_init(&shmem->vmap_lock);
        INIT_LIST_HEAD(&shmem->madv_list);
@@ -284,7 +287,7 @@ static int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, struct
                if (ret)
                        goto err_zero_use;
 
-               if (!shmem->map_cached)
+               if (shmem->map_wc)
                        prot = pgprot_writecombine(prot);
                shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
                                    VM_MAP, prot);
@@ -477,33 +480,6 @@ bool drm_gem_shmem_purge(struct drm_gem_object *obj)
 EXPORT_SYMBOL(drm_gem_shmem_purge);
 
 /**
- * drm_gem_shmem_create_object_cached - Create a shmem buffer object with
- *                                      cached mappings
- * @dev: DRM device
- * @size: Size of the object to allocate
- *
- * By default, shmem buffer objects use writecombine mappings. This
- * function implements struct drm_driver.gem_create_object for shmem
- * buffer objects with cached mappings.
- *
- * Returns:
- * A struct drm_gem_shmem_object * on success or NULL negative on failure.
- */
-struct drm_gem_object *
-drm_gem_shmem_create_object_cached(struct drm_device *dev, size_t size)
-{
-       struct drm_gem_shmem_object *shmem;
-
-       shmem = kzalloc(sizeof(*shmem), GFP_KERNEL);
-       if (!shmem)
-               return NULL;
-       shmem->map_cached = true;
-
-       return &shmem->base;
-}
-EXPORT_SYMBOL(drm_gem_shmem_create_object_cached);
-
-/**
  * drm_gem_shmem_dumb_create - Create a dumb shmem buffer object
  * @file: DRM file structure to create the dumb buffer for
  * @dev: DRM device
@@ -626,7 +602,7 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 
        vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-       if (!shmem->map_cached)
+       if (shmem->map_wc)
                vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
        vma->vm_ops = &drm_gem_shmem_vm_ops;
 
index bbd2354..6d38c5c 100644 (file)
@@ -145,10 +145,8 @@ static int etnaviv_gem_mmap_obj(struct etnaviv_gem_object *etnaviv_obj,
                 * address_space (so unmap_mapping_range does what we want,
                 * in particular in the case of mmap'd dmabufs)
                 */
-               fput(vma->vm_file);
-               get_file(etnaviv_obj->base.filp);
                vma->vm_pgoff = 0;
-               vma->vm_file  = etnaviv_obj->base.filp;
+               vma_set_file(vma, etnaviv_obj->base.filp);
 
                vma->vm_page_prot = vm_page_prot;
        }
index 0c86846..27f04ae 100644 (file)
@@ -23,6 +23,7 @@
  *
  */
 
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_probe_helper.h>
@@ -719,11 +720,13 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
 }
 
 static struct drm_encoder *intel_mst_atomic_best_encoder(struct drm_connector *connector,
-                                                        struct drm_connector_state *state)
+                                                        struct drm_atomic_state *state)
 {
+       struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
+                                                                                        connector);
        struct intel_connector *intel_connector = to_intel_connector(connector);
        struct intel_dp *intel_dp = intel_connector->mst_port;
-       struct intel_crtc *crtc = to_intel_crtc(state->crtc);
+       struct intel_crtc *crtc = to_intel_crtc(connector_state->crtc);
 
        return &intel_dp->mst_encoders[crtc->pipe]->base.base;
 }
index 0dd477e..04e9c04 100644 (file)
@@ -114,8 +114,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
        if (ret)
                return ret;
 
-       fput(vma->vm_file);
-       vma->vm_file = get_file(obj->base.filp);
+       vma_set_file(vma, obj->base.filp);
 
        return 0;
 }
index b07dc11..bcc80f4 100644 (file)
@@ -382,7 +382,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
                return true;
 
        if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
-           (vma->node.start + vma->node.size - 1) >> 32)
+           (vma->node.start + vma->node.size + 4095) >> 32)
                return true;
 
        if (flags & __EXEC_OBJECT_NEEDS_MAP &&
index 3d69e51..ec28a6c 100644 (file)
@@ -893,8 +893,9 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
         * requires avoiding extraneous references to their filp, hence why
         * we prefer to use an anonymous file for their mmaps.
         */
-       fput(vma->vm_file);
-       vma->vm_file = anon;
+       vma_set_file(vma, anon);
+       /* Drop the initial creation reference, the vma is now holding one. */
+       fput(anon);
 
        switch (mmo->mmap_type) {
        case I915_MMAP_TYPE_WC:
index 15be8de..0a3ee4f 100644 (file)
@@ -1579,9 +1579,9 @@ static inline const struct i915_rev_steppings *
 tgl_revids_get(struct drm_i915_private *dev_priv)
 {
        if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv))
-               return tgl_uy_revids;
+               return &tgl_uy_revids[INTEL_REVID(dev_priv)];
        else
-               return tgl_revids;
+               return &tgl_revids[INTEL_REVID(dev_priv)];
 }
 
 #define IS_TGL_DISP_REVID(p, since, until) \
@@ -1591,14 +1591,14 @@ tgl_revids_get(struct drm_i915_private *dev_priv)
 
 #define IS_TGL_UY_GT_REVID(p, since, until) \
        ((IS_TGL_U(p) || IS_TGL_Y(p)) && \
-        tgl_uy_revids->gt_stepping >= (since) && \
-        tgl_uy_revids->gt_stepping <= (until))
+        tgl_uy_revids[INTEL_REVID(p)].gt_stepping >= (since) && \
+        tgl_uy_revids[INTEL_REVID(p)].gt_stepping <= (until))
 
 #define IS_TGL_GT_REVID(p, since, until) \
        (IS_TIGERLAKE(p) && \
         !(IS_TGL_U(p) || IS_TGL_Y(p)) && \
-        tgl_revids->gt_stepping >= (since) && \
-        tgl_revids->gt_stepping <= (until))
+        tgl_revids[INTEL_REVID(p)].gt_stepping >= (since) && \
+        tgl_revids[INTEL_REVID(p)].gt_stepping <= (until))
 
 #define RKL_REVID_A0           0x0
 #define RKL_REVID_B0           0x1
index dc6febc..c80eeac 100644 (file)
@@ -4242,18 +4242,21 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
         */
        dev_priv->hotplug.hpd_short_storm_enabled = !HAS_DP_MST(dev_priv);
 
-       if (HAS_PCH_DG1(dev_priv))
-               dev_priv->display.hpd_irq_setup = dg1_hpd_irq_setup;
-       else if (INTEL_GEN(dev_priv) >= 11)
-               dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup;
-       else if (IS_GEN9_LP(dev_priv))
-               dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
-       else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
-               dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
-       else if (HAS_GMCH(dev_priv) && I915_HAS_HOTPLUG(dev_priv))
-               dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
-       else
-               dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup;
+       if (HAS_GMCH(dev_priv)) {
+               if (I915_HAS_HOTPLUG(dev_priv))
+                       dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
+       } else {
+               if (HAS_PCH_DG1(dev_priv))
+                       dev_priv->display.hpd_irq_setup = dg1_hpd_irq_setup;
+               else if (INTEL_GEN(dev_priv) >= 11)
+                       dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup;
+               else if (IS_GEN9_LP(dev_priv))
+                       dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
+               else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
+                       dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
+               else
+                       dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup;
+       }
 }
 
 /**
index 3b12c8f..649c265 100644 (file)
@@ -914,7 +914,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
                intel_uncore_rmw(uncore, oastatus_reg,
                                 GEN8_OASTATUS_COUNTER_OVERFLOW |
                                 GEN8_OASTATUS_REPORT_LOST,
-                                IS_GEN_RANGE(uncore->i915, 8, 10) ?
+                                IS_GEN_RANGE(uncore->i915, 8, 11) ?
                                 (GEN8_OASTATUS_HEAD_POINTER_WRAP |
                                  GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0);
        }
index c642ae1..1e58227 100644 (file)
@@ -7,6 +7,7 @@
 #define __DCSS_PRV_H__
 
 #include <drm/drm_fourcc.h>
+#include <drm/drm_plane.h>
 #include <linux/io.h>
 #include <video/videomode.h>
 
@@ -165,6 +166,8 @@ void dcss_ss_sync_set(struct dcss_ss *ss, struct videomode *vm,
 /* SCALER */
 int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base);
 void dcss_scaler_exit(struct dcss_scaler *scl);
+void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
+                           enum drm_scaling_filter scaling_filter);
 void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
                       const struct drm_format_info *format,
                       int src_xres, int src_yres, int dst_xres, int dst_yres,
index e13652e..03ba88f 100644 (file)
@@ -103,15 +103,15 @@ static bool dcss_plane_can_rotate(const struct drm_format_info *format,
                                  bool mod_present, u64 modifier,
                                  unsigned int rotation)
 {
-       bool linear_format = !mod_present ||
-                            (mod_present && modifier == DRM_FORMAT_MOD_LINEAR);
+       bool linear_format = !mod_present || modifier == DRM_FORMAT_MOD_LINEAR;
        u32 supported_rotation = DRM_MODE_ROTATE_0;
 
        if (!format->is_yuv && linear_format)
                supported_rotation = DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_180 |
                                     DRM_MODE_REFLECT_MASK;
        else if (!format->is_yuv &&
-                modifier == DRM_FORMAT_MOD_VIVANTE_TILED)
+                (modifier == DRM_FORMAT_MOD_VIVANTE_TILED ||
+                 modifier == DRM_FORMAT_MOD_VIVANTE_SUPER_TILED))
                supported_rotation = DRM_MODE_ROTATE_MASK |
                                     DRM_MODE_REFLECT_MASK;
        else if (format->is_yuv && linear_format &&
@@ -257,7 +257,8 @@ static bool dcss_plane_needs_setup(struct drm_plane_state *state,
               state->src_h  != old_state->src_h  ||
               fb->format->format != old_fb->format->format ||
               fb->modifier  != old_fb->modifier ||
-              state->rotation != old_state->rotation;
+              state->rotation != old_state->rotation ||
+              state->scaling_filter != old_state->scaling_filter;
 }
 
 static void dcss_plane_atomic_update(struct drm_plane *plane,
@@ -272,6 +273,7 @@ static void dcss_plane_atomic_update(struct drm_plane *plane,
        u32 src_w, src_h, dst_w, dst_h;
        struct drm_rect src, dst;
        bool enable = true;
+       bool is_rotation_90_or_270;
 
        if (!fb || !state->crtc || !state->visible)
                return;
@@ -309,8 +311,16 @@ static void dcss_plane_atomic_update(struct drm_plane *plane,
 
        dcss_plane_atomic_set_base(dcss_plane);
 
+       is_rotation_90_or_270 = state->rotation & (DRM_MODE_ROTATE_90 |
+                                                  DRM_MODE_ROTATE_270);
+
+       dcss_scaler_set_filter(dcss->scaler, dcss_plane->ch_num,
+                              state->scaling_filter);
+
        dcss_scaler_setup(dcss->scaler, dcss_plane->ch_num,
-                         state->fb->format, src_w, src_h,
+                         state->fb->format,
+                         is_rotation_90_or_270 ? src_h : src_w,
+                         is_rotation_90_or_270 ? src_w : src_h,
                          dst_w, dst_h,
                          drm_mode_vrefresh(&crtc_state->mode));
 
@@ -388,6 +398,10 @@ struct dcss_plane *dcss_plane_init(struct drm_device *drm,
        if (ret)
                return ERR_PTR(ret);
 
+       drm_plane_create_scaling_filter_property(&dcss_plane->base,
+                                       BIT(DRM_SCALING_FILTER_DEFAULT) |
+                                       BIT(DRM_SCALING_FILTER_NEAREST_NEIGHBOR));
+
        drm_plane_create_rotation_property(&dcss_plane->base,
                                           DRM_MODE_ROTATE_0,
                                           DRM_MODE_ROTATE_0   |
index cd21905..47852b9 100644 (file)
@@ -77,6 +77,8 @@ struct dcss_scaler_ch {
 
        u32 c_vstart;
        u32 c_hstart;
+
+       bool use_nn_interpolation;
 };
 
 struct dcss_scaler {
@@ -243,6 +245,17 @@ static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
        }
 }
 
+static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps,
+                                               int coef[][PSC_NUM_TAPS])
+{
+       int i, j;
+
+       for (i = 0; i < PSC_STORED_PHASES; i++)
+               for (j = 0; j < PSC_NUM_TAPS; j++)
+                       coef[i][j] = j == PSC_NUM_TAPS >> 1 ?
+                                               (1 << PSC_COEFF_PRECISION) : 0;
+}
+
 /**
  * dcss_scaler_filter_design() - Compute filter coefficients using
  *                              Gaussian filter.
@@ -253,7 +266,8 @@ static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps,
  */
 static void dcss_scaler_filter_design(int src_length, int dst_length,
                                      bool use_5_taps, bool phase0_identity,
-                                     int coef[][PSC_NUM_TAPS])
+                                     int coef[][PSC_NUM_TAPS],
+                                     bool nn_interpolation)
 {
        int fc_q;
 
@@ -263,8 +277,11 @@ static void dcss_scaler_filter_design(int src_length, int dst_length,
        else
                fc_q = div_q(dst_length, src_length * PSC_NUM_PHASES);
 
-       /* compute gaussian filter coefficients */
-       dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
+       if (nn_interpolation)
+               dcss_scaler_nearest_neighbor_filter(use_5_taps, coef);
+       else
+               /* compute gaussian filter coefficients */
+               dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef);
 }
 
 static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs)
@@ -653,12 +670,14 @@ static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
 
        /* horizontal luma */
        dcss_scaler_filter_design(src_xres, dst_xres, false,
-                                 src_xres == dst_xres, coef);
+                                 src_xres == dst_xres, coef,
+                                 ch->use_nn_interpolation);
        dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
 
        /* vertical luma */
        dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
-                                 src_yres == dst_yres, coef);
+                                 src_yres == dst_yres, coef,
+                                 ch->use_nn_interpolation);
 
        if (program_5_taps)
                dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
@@ -678,14 +697,14 @@ static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch,
        /* horizontal chroma */
        dcss_scaler_filter_design(src_xres, dst_xres, false,
                                  (src_xres == dst_xres) && (ch->c_hstart == 0),
-                                 coef);
+                                 coef, ch->use_nn_interpolation);
 
        dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef);
 
        /* vertical chroma */
        dcss_scaler_filter_design(src_yres, dst_yres, program_5_taps,
                                  (src_yres == dst_yres) && (ch->c_vstart == 0),
-                                 coef);
+                                 coef, ch->use_nn_interpolation);
        if (program_5_taps)
                dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef);
        else
@@ -700,12 +719,14 @@ static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch,
 
        /* horizontal RGB */
        dcss_scaler_filter_design(src_xres, dst_xres, false,
-                                 src_xres == dst_xres, coef);
+                                 src_xres == dst_xres, coef,
+                                 ch->use_nn_interpolation);
        dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef);
 
        /* vertical RGB */
        dcss_scaler_filter_design(src_yres, dst_yres, false,
-                                 src_yres == dst_yres, coef);
+                                 src_yres == dst_yres, coef,
+                                 ch->use_nn_interpolation);
        dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef);
 }
 
@@ -751,6 +772,14 @@ static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch,
        ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS;
 }
 
+void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num,
+                           enum drm_scaling_filter scaling_filter)
+{
+       struct dcss_scaler_ch *ch = &scl->ch[ch_num];
+
+       ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR;
+}
+
 void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num,
                       const struct drm_format_info *format,
                       int src_xres, int src_yres, int dst_xres, int dst_yres,
index 832e528..de62966 100644 (file)
@@ -225,7 +225,7 @@ struct drm_gem_object *lima_gem_create_object(struct drm_device *dev, size_t siz
 
        mutex_init(&bo->lock);
        INIT_LIST_HEAD(&bo->va);
-
+       bo->base.map_wc = true;
        bo->base.base.funcs = &lima_gem_funcs;
 
        return &bo->base.base;
index b399012..71c689b 100644 (file)
@@ -4,6 +4,7 @@ config DRM_MCDE
        depends on CMA
        depends on ARM || COMPILE_TEST
        depends on OF
+       depends on COMMON_CLK
        select MFD_SYSCON
        select DRM_MIPI_DSI
        select DRM_BRIDGE
index fe28f4e..15d9c89 100644 (file)
@@ -1,3 +1,3 @@
-mcde_drm-y +=  mcde_drv.o mcde_dsi.o mcde_display.o
+mcde_drm-y +=  mcde_drv.o mcde_dsi.o mcde_clk_div.o mcde_display.o
 
 obj-$(CONFIG_DRM_MCDE) += mcde_drm.o
diff --git a/drivers/gpu/drm/mcde/mcde_clk_div.c b/drivers/gpu/drm/mcde/mcde_clk_div.c
new file mode 100644 (file)
index 0000000..038821d
--- /dev/null
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/clk-provider.h>
+#include <linux/regulator/consumer.h>
+
+#include "mcde_drm.h"
+#include "mcde_display_regs.h"
+
+/* The MCDE internal clock dividers for FIFO A and B */
+struct mcde_clk_div {
+       struct clk_hw hw;
+       struct mcde *mcde;
+       u32 cr;
+       u32 cr_div;
+};
+
+static int mcde_clk_div_enable(struct clk_hw *hw)
+{
+       struct mcde_clk_div *cdiv = container_of(hw, struct mcde_clk_div, hw);
+       struct mcde *mcde = cdiv->mcde;
+       u32 val;
+
+       spin_lock(&mcde->fifo_crx1_lock);
+       val = readl(mcde->regs + cdiv->cr);
+       /*
+        * Select the PLL72 (LCD) clock as parent
+        * FIXME: implement other parents.
+        */
+       val &= ~MCDE_CRX1_CLKSEL_MASK;
+       val |= MCDE_CRX1_CLKSEL_CLKPLL72 << MCDE_CRX1_CLKSEL_SHIFT;
+       /* Internal clock */
+       val |= MCDE_CRA1_CLKTYPE_TVXCLKSEL1;
+
+       /* Clear then set the divider */
+       val &= ~(MCDE_CRX1_BCD | MCDE_CRX1_PCD_MASK);
+       val |= cdiv->cr_div;
+
+       writel(val, mcde->regs + cdiv->cr);
+       spin_unlock(&mcde->fifo_crx1_lock);
+
+       return 0;
+}
+
+static int mcde_clk_div_choose_div(struct clk_hw *hw, unsigned long rate,
+                                  unsigned long *prate, bool set_parent)
+{
+       int best_div = 1, div;
+       struct clk_hw *parent = clk_hw_get_parent(hw);
+       unsigned long best_prate = 0;
+       unsigned long best_diff = ~0ul;
+       int max_div = (1 << MCDE_CRX1_PCD_BITS) - 1;
+
+       for (div = 1; div < max_div; div++) {
+               unsigned long this_prate, div_rate, diff;
+
+               if (set_parent)
+                       this_prate = clk_hw_round_rate(parent, rate * div);
+               else
+                       this_prate = *prate;
+               div_rate = DIV_ROUND_UP_ULL(this_prate, div);
+               diff = abs(rate - div_rate);
+
+               if (diff < best_diff) {
+                       best_div = div;
+                       best_diff = diff;
+                       best_prate = this_prate;
+               }
+       }
+
+       *prate = best_prate;
+       return best_div;
+}
+
+static long mcde_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
+                                    unsigned long *prate)
+{
+       int div = mcde_clk_div_choose_div(hw, rate, prate, true);
+
+       return DIV_ROUND_UP_ULL(*prate, div);
+}
+
+static unsigned long mcde_clk_div_recalc_rate(struct clk_hw *hw,
+                                              unsigned long prate)
+{
+       struct mcde_clk_div *cdiv = container_of(hw, struct mcde_clk_div, hw);
+       struct mcde *mcde = cdiv->mcde;
+       u32 cr;
+       int div;
+
+       /*
+        * If the MCDE is not powered we can't access registers.
+        * It will come up with 0 in the divider register bits, which
+        * means "divide by 2".
+        */
+       if (!regulator_is_enabled(mcde->epod))
+               return DIV_ROUND_UP_ULL(prate, 2);
+
+       cr = readl(mcde->regs + cdiv->cr);
+       if (cr & MCDE_CRX1_BCD)
+               return prate;
+
+       /* 0 in the PCD means "divide by 2", 1 means "divide by 3" etc */
+       div = cr & MCDE_CRX1_PCD_MASK;
+       div += 2;
+
+       return DIV_ROUND_UP_ULL(prate, div);
+}
+
+static int mcde_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
+                                 unsigned long prate)
+{
+       struct mcde_clk_div *cdiv = container_of(hw, struct mcde_clk_div, hw);
+       int div = mcde_clk_div_choose_div(hw, rate, &prate, false);
+       u32 cr = 0;
+
+       /*
+        * We cache the CR bits to set the divide in the state so that
+        * we can call this before we can even write to the hardware.
+        */
+       if (div == 1) {
+               /* Bypass clock divider */
+               cr |= MCDE_CRX1_BCD;
+       } else {
+               div -= 2;
+               cr |= div & MCDE_CRX1_PCD_MASK;
+       }
+       cdiv->cr_div = cr;
+
+       return 0;
+}
+
+static const struct clk_ops mcde_clk_div_ops = {
+       .enable = mcde_clk_div_enable,
+       .recalc_rate = mcde_clk_div_recalc_rate,
+       .round_rate = mcde_clk_div_round_rate,
+       .set_rate = mcde_clk_div_set_rate,
+};
+
+int mcde_init_clock_divider(struct mcde *mcde)
+{
+       struct device *dev = mcde->dev;
+       struct mcde_clk_div *fifoa;
+       struct mcde_clk_div *fifob;
+       const char *parent_name;
+       struct clk_init_data fifoa_init = {
+               .name = "fifoa",
+               .ops = &mcde_clk_div_ops,
+               .parent_names = &parent_name,
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       };
+       struct clk_init_data fifob_init = {
+               .name = "fifob",
+               .ops = &mcde_clk_div_ops,
+               .parent_names = &parent_name,
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+       };
+       int ret;
+
+       spin_lock_init(&mcde->fifo_crx1_lock);
+       parent_name = __clk_get_name(mcde->lcd_clk);
+
+       /* Allocate 2 clocks */
+       fifoa = devm_kzalloc(dev, sizeof(*fifoa), GFP_KERNEL);
+       if (!fifoa)
+               return -ENOMEM;
+       fifob = devm_kzalloc(dev, sizeof(*fifob), GFP_KERNEL);
+       if (!fifob)
+               return -ENOMEM;
+
+       fifoa->mcde = mcde;
+       fifoa->cr = MCDE_CRA1;
+       fifoa->hw.init = &fifoa_init;
+       ret = devm_clk_hw_register(dev, &fifoa->hw);
+       if (ret) {
+               dev_err(dev, "error registering FIFO A clock divider\n");
+               return ret;
+       }
+       mcde->fifoa_clk = fifoa->hw.clk;
+
+       fifob->mcde = mcde;
+       fifob->cr = MCDE_CRB1;
+       fifob->hw.init = &fifob_init;
+       ret = devm_clk_hw_register(dev, &fifob->hw);
+       if (ret) {
+               dev_err(dev, "error registering FIFO B clock divider\n");
+               return ret;
+       }
+       mcde->fifob_clk = fifob->hw.clk;
+
+       return 0;
+}
index c271e5b..7c2e0b8 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/delay.h>
 #include <linux/dma-buf.h>
 #include <linux/regulator/consumer.h>
+#include <linux/media-bus-format.h>
 
 #include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
@@ -16,6 +17,7 @@
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_vblank.h>
 #include <video/mipi_display.h>
 
@@ -57,10 +59,15 @@ enum mcde_overlay {
        MCDE_OVERLAY_5,
 };
 
-enum mcde_dsi_formatter {
+enum mcde_formatter {
        MCDE_DSI_FORMATTER_0 = 0,
        MCDE_DSI_FORMATTER_1,
        MCDE_DSI_FORMATTER_2,
+       MCDE_DSI_FORMATTER_3,
+       MCDE_DSI_FORMATTER_4,
+       MCDE_DSI_FORMATTER_5,
+       MCDE_DPI_FORMATTER_0,
+       MCDE_DPI_FORMATTER_1,
 };
 
 void mcde_display_irq(struct mcde *mcde)
@@ -81,7 +88,7 @@ void mcde_display_irq(struct mcde *mcde)
         *
         * TODO: Currently only one DSI link is supported.
         */
-       if (mcde_dsi_irq(mcde->mdsi)) {
+       if (!mcde->dpi_output && mcde_dsi_irq(mcde->mdsi)) {
                u32 val;
 
                /*
@@ -243,73 +250,70 @@ static int mcde_configure_extsrc(struct mcde *mcde, enum mcde_extsrc src,
        val = 0 << MCDE_EXTSRCXCONF_BUF_ID_SHIFT;
        val |= 1 << MCDE_EXTSRCXCONF_BUF_NB_SHIFT;
        val |= 0 << MCDE_EXTSRCXCONF_PRI_OVLID_SHIFT;
-       /*
-        * MCDE has inverse semantics from DRM on RBG/BGR which is why
-        * all the modes are inversed here.
-        */
+
        switch (format) {
        case DRM_FORMAT_ARGB8888:
                val |= MCDE_EXTSRCXCONF_BPP_ARGB8888 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_ABGR8888:
                val |= MCDE_EXTSRCXCONF_BPP_ARGB8888 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_XRGB8888:
                val |= MCDE_EXTSRCXCONF_BPP_XRGB8888 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_XBGR8888:
                val |= MCDE_EXTSRCXCONF_BPP_XRGB8888 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_RGB888:
                val |= MCDE_EXTSRCXCONF_BPP_RGB888 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_BGR888:
                val |= MCDE_EXTSRCXCONF_BPP_RGB888 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_ARGB4444:
                val |= MCDE_EXTSRCXCONF_BPP_ARGB4444 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_ABGR4444:
                val |= MCDE_EXTSRCXCONF_BPP_ARGB4444 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_XRGB4444:
                val |= MCDE_EXTSRCXCONF_BPP_RGB444 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_XBGR4444:
                val |= MCDE_EXTSRCXCONF_BPP_RGB444 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_XRGB1555:
                val |= MCDE_EXTSRCXCONF_BPP_IRGB1555 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_XBGR1555:
                val |= MCDE_EXTSRCXCONF_BPP_IRGB1555 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_RGB565:
                val |= MCDE_EXTSRCXCONF_BPP_RGB565 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
-               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_BGR565:
                val |= MCDE_EXTSRCXCONF_BPP_RGB565 <<
                        MCDE_EXTSRCXCONF_BPP_SHIFT;
+               val |= MCDE_EXTSRCXCONF_BGR;
                break;
        case DRM_FORMAT_YUV422:
                val |= MCDE_EXTSRCXCONF_BPP_YCBCR422 <<
@@ -556,6 +560,7 @@ static void mcde_configure_channel(struct mcde *mcde, enum mcde_channel ch,
                        << MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_SHIFT;
                break;
        case MCDE_VIDEO_FORMATTER_FLOW:
+       case MCDE_DPI_FORMATTER_FLOW:
                val = MCDE_CHNLXSYNCHMOD_SRC_SYNCH_HARDWARE
                        << MCDE_CHNLXSYNCHMOD_SRC_SYNCH_SHIFT;
                val |= MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_FORMATTER
@@ -564,7 +569,7 @@ static void mcde_configure_channel(struct mcde *mcde, enum mcde_channel ch,
        default:
                dev_err(mcde->dev, "unknown flow mode %d\n",
                        mcde->flow_mode);
-               break;
+               return;
        }
 
        writel(val, mcde->regs + sync);
@@ -594,10 +599,35 @@ static void mcde_configure_channel(struct mcde *mcde, enum mcde_channel ch,
                       mcde->regs + mux);
                break;
        }
+
+       /*
+        * If using DPI configure the sync event.
+        * TODO: this is for LCD only, it does not cover TV out.
+        */
+       if (mcde->dpi_output) {
+               u32 stripwidth;
+
+               stripwidth = 0xF000 / (mode->vdisplay * 4);
+               dev_info(mcde->dev, "stripwidth: %d\n", stripwidth);
+
+               val = MCDE_SYNCHCONF_HWREQVEVENT_ACTIVE_VIDEO |
+                       (mode->hdisplay - 1 - stripwidth) << MCDE_SYNCHCONF_HWREQVCNT_SHIFT |
+                       MCDE_SYNCHCONF_SWINTVEVENT_ACTIVE_VIDEO |
+                       (mode->hdisplay - 1 - stripwidth) << MCDE_SYNCHCONF_SWINTVCNT_SHIFT;
+
+               switch (fifo) {
+               case MCDE_FIFO_A:
+                       writel(val, mcde->regs + MCDE_SYNCHCONFA);
+                       break;
+               case MCDE_FIFO_B:
+                       writel(val, mcde->regs + MCDE_SYNCHCONFB);
+                       break;
+               }
+       }
 }
 
 static void mcde_configure_fifo(struct mcde *mcde, enum mcde_fifo fifo,
-                               enum mcde_dsi_formatter fmt,
+                               enum mcde_formatter fmt,
                                int fifo_wtrmrk)
 {
        u32 val;
@@ -618,12 +648,49 @@ static void mcde_configure_fifo(struct mcde *mcde, enum mcde_fifo fifo,
        }
 
        val = fifo_wtrmrk << MCDE_CTRLX_FIFOWTRMRK_SHIFT;
-       /* We only support DSI formatting for now */
-       val |= MCDE_CTRLX_FORMTYPE_DSI <<
-               MCDE_CTRLX_FORMTYPE_SHIFT;
 
-       /* Select the formatter to use for this FIFO */
-       val |= fmt << MCDE_CTRLX_FORMID_SHIFT;
+       /*
+        * Select the formatter to use for this FIFO
+        *
+        * The register definitions imply that different IDs should be used
+        * by the DSI formatters depending on if they are in VID or CMD
+        * mode, and the manual says they are dedicated but identical.
+        * The vendor code uses them as it seems fit.
+        */
+       switch (fmt) {
+       case MCDE_DSI_FORMATTER_0:
+               val |= MCDE_CTRLX_FORMTYPE_DSI << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DSI0VID << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DSI_FORMATTER_1:
+               val |= MCDE_CTRLX_FORMTYPE_DSI << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DSI0CMD << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DSI_FORMATTER_2:
+               val |= MCDE_CTRLX_FORMTYPE_DSI << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DSI1VID << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DSI_FORMATTER_3:
+               val |= MCDE_CTRLX_FORMTYPE_DSI << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DSI1CMD << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DSI_FORMATTER_4:
+               val |= MCDE_CTRLX_FORMTYPE_DSI << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DSI2VID << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DSI_FORMATTER_5:
+               val |= MCDE_CTRLX_FORMTYPE_DSI << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DSI2CMD << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DPI_FORMATTER_0:
+               val |= MCDE_CTRLX_FORMTYPE_DPITV << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DPIA << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       case MCDE_DPI_FORMATTER_1:
+               val |= MCDE_CTRLX_FORMTYPE_DPITV << MCDE_CTRLX_FORMTYPE_SHIFT;
+               val |= MCDE_CTRLX_FORMID_DPIB << MCDE_CTRLX_FORMID_SHIFT;
+               break;
+       }
        writel(val, mcde->regs + ctrl);
 
        /* Blend source with Alpha 0xff on FIFO */
@@ -631,17 +698,54 @@ static void mcde_configure_fifo(struct mcde *mcde, enum mcde_fifo fifo,
                0xff << MCDE_CRX0_ALPHABLEND_SHIFT;
        writel(val, mcde->regs + cr0);
 
-       /* Set-up from mcde_fmtr_dsi.c, fmtr_dsi_enable_video() */
-
-       /* Use the MCDE clock for this FIFO */
-       val = MCDE_CRX1_CLKSEL_MCDECLK << MCDE_CRX1_CLKSEL_SHIFT;
+       spin_lock(&mcde->fifo_crx1_lock);
+       val = readl(mcde->regs + cr1);
+       /*
+        * Set-up from mcde_fmtr_dsi.c, fmtr_dsi_enable_video()
+        * FIXME: a different clock needs to be selected for TV out.
+        */
+       if (mcde->dpi_output) {
+               struct drm_connector *connector = drm_panel_bridge_connector(mcde->bridge);
+               u32 bus_format;
+
+               /* Assume RGB888 24 bit if we have no further info */
+               if (!connector->display_info.num_bus_formats) {
+                       dev_info(mcde->dev, "panel does not specify bus format, assume RGB888\n");
+                       bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+               } else {
+                       bus_format = connector->display_info.bus_formats[0];
+               }
 
-       /* TODO: when adding DPI support add OUTBPP etc here */
+               /*
+                * Set up the CDWIN and OUTBPP for the LCD
+                *
+                * FIXME: fill this in if you know the correspondance between the MIPI
+                * DPI specification and the media bus formats.
+                */
+               val &= ~MCDE_CRX1_CDWIN_MASK;
+               val &= ~MCDE_CRX1_OUTBPP_MASK;
+               switch (bus_format) {
+               case MEDIA_BUS_FMT_RGB888_1X24:
+                       val |= MCDE_CRX1_CDWIN_24BPP << MCDE_CRX1_CDWIN_SHIFT;
+                       val |= MCDE_CRX1_OUTBPP_24BPP << MCDE_CRX1_OUTBPP_SHIFT;
+                       break;
+               default:
+                       dev_err(mcde->dev, "unknown bus format, assume RGB888\n");
+                       val |= MCDE_CRX1_CDWIN_24BPP << MCDE_CRX1_CDWIN_SHIFT;
+                       val |= MCDE_CRX1_OUTBPP_24BPP << MCDE_CRX1_OUTBPP_SHIFT;
+                       break;
+               }
+       } else {
+               /* Use the MCDE clock for DSI */
+               val &= ~MCDE_CRX1_CLKSEL_MASK;
+               val |= MCDE_CRX1_CLKSEL_MCDECLK << MCDE_CRX1_CLKSEL_SHIFT;
+       }
        writel(val, mcde->regs + cr1);
+       spin_unlock(&mcde->fifo_crx1_lock);
 };
 
 static void mcde_configure_dsi_formatter(struct mcde *mcde,
-                                        enum mcde_dsi_formatter fmt,
+                                        enum mcde_formatter fmt,
                                         u32 formatter_frame,
                                         int pkt_size)
 {
@@ -681,6 +785,9 @@ static void mcde_configure_dsi_formatter(struct mcde *mcde,
                delay0 = MCDE_DSIVID2DELAY0;
                delay1 = MCDE_DSIVID2DELAY1;
                break;
+       default:
+               dev_err(mcde->dev, "tried to configure a non-DSI formatter as DSI\n");
+               return;
        }
 
        /*
@@ -700,7 +807,9 @@ static void mcde_configure_dsi_formatter(struct mcde *mcde,
                        MCDE_DSICONF0_PACKING_SHIFT;
                break;
        case MIPI_DSI_FMT_RGB666_PACKED:
-               val |= MCDE_DSICONF0_PACKING_RGB666_PACKED <<
+               dev_err(mcde->dev,
+                       "we cannot handle the packed RGB666 format\n");
+               val |= MCDE_DSICONF0_PACKING_RGB666 <<
                        MCDE_DSICONF0_PACKING_SHIFT;
                break;
        case MIPI_DSI_FMT_RGB565:
@@ -860,73 +969,140 @@ static int mcde_dsi_get_pkt_div(int ppl, int fifo_size)
        return 1;
 }
 
-static void mcde_display_enable(struct drm_simple_display_pipe *pipe,
-                               struct drm_crtc_state *cstate,
-                               struct drm_plane_state *plane_state)
+static void mcde_setup_dpi(struct mcde *mcde, const struct drm_display_mode *mode,
+                          int *fifo_wtrmrk_lvl)
 {
-       struct drm_crtc *crtc = &pipe->crtc;
-       struct drm_plane *plane = &pipe->plane;
-       struct drm_device *drm = crtc->dev;
-       struct mcde *mcde = to_mcde(drm);
-       const struct drm_display_mode *mode = &cstate->mode;
-       struct drm_framebuffer *fb = plane->state->fb;
-       u32 format = fb->format->format;
-       u32 formatter_ppl = mode->hdisplay; /* pixels per line */
-       u32 formatter_lpf = mode->vdisplay; /* lines per frame */
-       int pkt_size, fifo_wtrmrk;
-       int cpp = fb->format->cpp[0];
-       int formatter_cpp;
-       struct drm_format_name_buf tmp;
-       u32 formatter_frame;
-       u32 pkt_div;
+       struct drm_connector *connector = drm_panel_bridge_connector(mcde->bridge);
+       u32 hsw, hfp, hbp;
+       u32 vsw, vfp, vbp;
        u32 val;
-       int ret;
 
-       /* This powers up the entire MCDE block and the DSI hardware */
-       ret = regulator_enable(mcde->epod);
-       if (ret) {
-               dev_err(drm->dev, "can't re-enable EPOD regulator\n");
-               return;
-       }
+       /* FIXME: we only support LCD, implement TV out */
+       hsw = mode->hsync_end - mode->hsync_start;
+       hfp = mode->hsync_start - mode->hdisplay;
+       hbp = mode->htotal - mode->hsync_end;
+       vsw = mode->vsync_end - mode->vsync_start;
+       vfp = mode->vsync_start - mode->vdisplay;
+       vbp = mode->vtotal - mode->vsync_end;
 
-       dev_info(drm->dev, "enable MCDE, %d x %d format %s\n",
-                mode->hdisplay, mode->vdisplay,
-                drm_get_format_name(format, &tmp));
-       if (!mcde->mdsi) {
-               /* TODO: deal with this for non-DSI output */
-               dev_err(drm->dev, "no DSI master attached!\n");
-               return;
-       }
+       dev_info(mcde->dev, "output on DPI LCD from channel A\n");
+       /* Display actual values */
+       dev_info(mcde->dev, "HSW: %d, HFP: %d, HBP: %d, VSW: %d, VFP: %d, VBP: %d\n",
+                hsw, hfp, hbp, vsw, vfp, vbp);
+
+       /*
+        * The pixel fetcher is 128 64-bit words deep = 1024 bytes.
+        * One overlay of 32bpp (4 cpp) assumed, fetch 160 pixels.
+        * 160 * 4 = 640 bytes.
+        */
+       *fifo_wtrmrk_lvl = 640;
 
        /* Set up the main control, watermark level at 7 */
        val = 7 << MCDE_CONF0_IFIFOCTRLWTRMRKLVL_SHIFT;
-       /* 24 bits DPI: connect LSB Ch B to D[0:7] */
-       val |= 3 << MCDE_CONF0_OUTMUX0_SHIFT;
-       /* TV out: connect LSB Ch B to D[8:15] */
-       val |= 3 << MCDE_CONF0_OUTMUX1_SHIFT;
+
+       /*
+        * This sets up the internal silicon muxing of the DPI
+        * lines. This is how the silicon connects out to the
+        * external pins, then the pins need to be further
+        * configured into "alternate functions" using pin control
+        * to actually get the signals out.
+        *
+        * FIXME: this is hardcoded to the only setting found in
+        * the wild. If we need to use different settings for
+        * different DPI displays, make this parameterizable from
+        * the device tree.
+        */
+       /* 24 bits DPI: connect Ch A LSB to D[0:7] */
+       val |= 0 << MCDE_CONF0_OUTMUX0_SHIFT;
+       /* 24 bits DPI: connect Ch A MID to D[8:15] */
+       val |= 1 << MCDE_CONF0_OUTMUX1_SHIFT;
        /* Don't care about this muxing */
        val |= 0 << MCDE_CONF0_OUTMUX2_SHIFT;
-       /* 24 bits DPI: connect MID Ch B to D[24:31] */
-       val |= 4 << MCDE_CONF0_OUTMUX3_SHIFT;
-       /* 5: 24 bits DPI: connect MSB Ch B to D[32:39] */
-       val |= 5 << MCDE_CONF0_OUTMUX4_SHIFT;
-       /* Syncmux bits zero: DPI channel A and B on output pins A and B resp */
+       /* Don't care about this muxing */
+       val |= 0 << MCDE_CONF0_OUTMUX3_SHIFT;
+       /* 24 bits DPI: connect Ch A MSB to D[32:39] */
+       val |= 2 << MCDE_CONF0_OUTMUX4_SHIFT;
+       /* Syncmux bits zero: DPI channel A */
        writel(val, mcde->regs + MCDE_CONF0);
 
-       /* Clear any pending interrupts */
-       mcde_display_disable_irqs(mcde);
-       writel(0, mcde->regs + MCDE_IMSCERR);
-       writel(0xFFFFFFFF, mcde->regs + MCDE_RISERR);
+       /* This hammers us into LCD mode */
+       writel(0, mcde->regs + MCDE_TVCRA);
+
+       /* Front porch and sync width */
+       val = (vsw << MCDE_TVBL1_BEL1_SHIFT);
+       val |= (vfp << MCDE_TVBL1_BSL1_SHIFT);
+       writel(val, mcde->regs + MCDE_TVBL1A);
+       /* The vendor driver sets the same value into TVBL2A */
+       writel(val, mcde->regs + MCDE_TVBL2A);
+
+       /* Vertical back porch */
+       val = (vbp << MCDE_TVDVO_DVO1_SHIFT);
+       /* The vendor drivers sets the same value into TVDVOA */
+       val |= (vbp << MCDE_TVDVO_DVO2_SHIFT);
+       writel(val, mcde->regs + MCDE_TVDVOA);
+
+       /* Horizontal back porch, as 0 = 1 cycle we need to subtract 1 */
+       writel((hbp - 1), mcde->regs + MCDE_TVTIM1A);
+
+       /* Horizongal sync width and horizonal front porch, 0 = 1 cycle */
+       val = ((hsw - 1) << MCDE_TVLBALW_LBW_SHIFT);
+       val |= ((hfp - 1) << MCDE_TVLBALW_ALW_SHIFT);
+       writel(val, mcde->regs + MCDE_TVLBALWA);
+
+       /* Blank some TV registers we don't use */
+       writel(0, mcde->regs + MCDE_TVISLA);
+       writel(0, mcde->regs + MCDE_TVBLUA);
+
+       /* Set up sync inversion etc */
+       val = 0;
+       if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+               val |= MCDE_LCDTIM1B_IHS;
+       if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+               val |= MCDE_LCDTIM1B_IVS;
+       if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW)
+               val |= MCDE_LCDTIM1B_IOE;
+       if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)
+               val |= MCDE_LCDTIM1B_IPC;
+       writel(val, mcde->regs + MCDE_LCDTIM1A);
+}
 
-       dev_info(drm->dev, "output in %s mode, format %dbpp\n",
+static void mcde_setup_dsi(struct mcde *mcde, const struct drm_display_mode *mode,
+                          int cpp, int *fifo_wtrmrk_lvl, int *dsi_formatter_frame,
+                          int *dsi_pkt_size)
+{
+       u32 formatter_ppl = mode->hdisplay; /* pixels per line */
+       u32 formatter_lpf = mode->vdisplay; /* lines per frame */
+       int formatter_frame;
+       int formatter_cpp;
+       int fifo_wtrmrk;
+       u32 pkt_div;
+       int pkt_size;
+       u32 val;
+
+       dev_info(mcde->dev, "output in %s mode, format %dbpp\n",
                 (mcde->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO) ?
                 "VIDEO" : "CMD",
                 mipi_dsi_pixel_format_to_bpp(mcde->mdsi->format));
        formatter_cpp =
                mipi_dsi_pixel_format_to_bpp(mcde->mdsi->format) / 8;
-       dev_info(drm->dev, "overlay CPP %d bytes, DSI CPP %d bytes\n",
-                cpp,
-                formatter_cpp);
+       dev_info(mcde->dev, "Overlay CPP: %d bytes, DSI formatter CPP %d bytes\n",
+                cpp, formatter_cpp);
+
+       /* Set up the main control, watermark level at 7 */
+       val = 7 << MCDE_CONF0_IFIFOCTRLWTRMRKLVL_SHIFT;
+
+       /*
+        * This is the internal silicon muxing of the DPI
+        * (parallell display) lines. Since we are not using
+        * this at all (we are using DSI) these are just
+        * dummy values from the vendor tree.
+        */
+       val |= 3 << MCDE_CONF0_OUTMUX0_SHIFT;
+       val |= 3 << MCDE_CONF0_OUTMUX1_SHIFT;
+       val |= 0 << MCDE_CONF0_OUTMUX2_SHIFT;
+       val |= 4 << MCDE_CONF0_OUTMUX3_SHIFT;
+       val |= 5 << MCDE_CONF0_OUTMUX4_SHIFT;
+       writel(val, mcde->regs + MCDE_CONF0);
 
        /* Calculations from mcde_fmtr_dsi.c, fmtr_dsi_enable_video() */
 
@@ -948,9 +1124,9 @@ static void mcde_display_enable(struct drm_simple_display_pipe *pipe,
                /* The FIFO is 640 entries deep on this v3 hardware */
                pkt_div = mcde_dsi_get_pkt_div(mode->hdisplay, 640);
        }
-       dev_dbg(drm->dev, "FIFO watermark after flooring: %d bytes\n",
+       dev_dbg(mcde->dev, "FIFO watermark after flooring: %d bytes\n",
                fifo_wtrmrk);
-       dev_dbg(drm->dev, "Packet divisor: %d bytes\n", pkt_div);
+       dev_dbg(mcde->dev, "Packet divisor: %d bytes\n", pkt_div);
 
        /* NOTE: pkt_div is 1 for video mode */
        pkt_size = (formatter_ppl * formatter_cpp) / pkt_div;
@@ -958,16 +1134,64 @@ static void mcde_display_enable(struct drm_simple_display_pipe *pipe,
        if (!(mcde->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO))
                pkt_size++;
 
-       dev_dbg(drm->dev, "DSI packet size: %d * %d bytes per line\n",
+       dev_dbg(mcde->dev, "DSI packet size: %d * %d bytes per line\n",
                pkt_size, pkt_div);
-       dev_dbg(drm->dev, "Overlay frame size: %u bytes\n",
+       dev_dbg(mcde->dev, "Overlay frame size: %u bytes\n",
                mode->hdisplay * mode->vdisplay * cpp);
-       mcde->stride = mode->hdisplay * cpp;
-       dev_dbg(drm->dev, "Overlay line stride: %u bytes\n",
-               mcde->stride);
        /* NOTE: pkt_div is 1 for video mode */
        formatter_frame = pkt_size * pkt_div * formatter_lpf;
-       dev_dbg(drm->dev, "Formatter frame size: %u bytes\n", formatter_frame);
+       dev_dbg(mcde->dev, "Formatter frame size: %u bytes\n", formatter_frame);
+
+       *fifo_wtrmrk_lvl = fifo_wtrmrk;
+       *dsi_pkt_size = pkt_size;
+       *dsi_formatter_frame = formatter_frame;
+}
+
+static void mcde_display_enable(struct drm_simple_display_pipe *pipe,
+                               struct drm_crtc_state *cstate,
+                               struct drm_plane_state *plane_state)
+{
+       struct drm_crtc *crtc = &pipe->crtc;
+       struct drm_plane *plane = &pipe->plane;
+       struct drm_device *drm = crtc->dev;
+       struct mcde *mcde = to_mcde(drm);
+       const struct drm_display_mode *mode = &cstate->mode;
+       struct drm_framebuffer *fb = plane->state->fb;
+       u32 format = fb->format->format;
+       int dsi_pkt_size;
+       int fifo_wtrmrk;
+       int cpp = fb->format->cpp[0];
+       struct drm_format_name_buf tmp;
+       u32 dsi_formatter_frame;
+       u32 val;
+       int ret;
+
+       /* This powers up the entire MCDE block and the DSI hardware */
+       ret = regulator_enable(mcde->epod);
+       if (ret) {
+               dev_err(drm->dev, "can't re-enable EPOD regulator\n");
+               return;
+       }
+
+       dev_info(drm->dev, "enable MCDE, %d x %d format %s\n",
+                mode->hdisplay, mode->vdisplay,
+                drm_get_format_name(format, &tmp));
+
+
+       /* Clear any pending interrupts */
+       mcde_display_disable_irqs(mcde);
+       writel(0, mcde->regs + MCDE_IMSCERR);
+       writel(0xFFFFFFFF, mcde->regs + MCDE_RISERR);
+
+       if (mcde->dpi_output)
+               mcde_setup_dpi(mcde, mode, &fifo_wtrmrk);
+       else
+               mcde_setup_dsi(mcde, mode, cpp, &fifo_wtrmrk,
+                              &dsi_formatter_frame, &dsi_pkt_size);
+
+       mcde->stride = mode->hdisplay * cpp;
+       dev_dbg(drm->dev, "Overlay line stride: %u bytes\n",
+                mcde->stride);
 
        /* Drain the FIFO A + channel 0 pipe so we have a clean slate */
        mcde_drain_pipe(mcde, MCDE_FIFO_A, MCDE_CHANNEL_0);
@@ -995,29 +1219,47 @@ static void mcde_display_enable(struct drm_simple_display_pipe *pipe,
         */
        mcde_configure_channel(mcde, MCDE_CHANNEL_0, MCDE_FIFO_A, mode);
 
-       /* Configure FIFO A to use DSI formatter 0 */
-       mcde_configure_fifo(mcde, MCDE_FIFO_A, MCDE_DSI_FORMATTER_0,
-                           fifo_wtrmrk);
+       if (mcde->dpi_output) {
+               unsigned long lcd_freq;
+
+               /* Configure FIFO A to use DPI formatter 0 */
+               mcde_configure_fifo(mcde, MCDE_FIFO_A, MCDE_DPI_FORMATTER_0,
+                                   fifo_wtrmrk);
+
+               /* Set up and enable the LCD clock */
+               lcd_freq = clk_round_rate(mcde->fifoa_clk, mode->clock * 1000);
+               ret = clk_set_rate(mcde->fifoa_clk, lcd_freq);
+               if (ret)
+                       dev_err(mcde->dev, "failed to set LCD clock rate %lu Hz\n",
+                               lcd_freq);
+               ret = clk_prepare_enable(mcde->fifoa_clk);
+               if (ret) {
+                       dev_err(mcde->dev, "failed to enable FIFO A DPI clock\n");
+                       return;
+               }
+               dev_info(mcde->dev, "LCD FIFO A clk rate %lu Hz\n",
+                        clk_get_rate(mcde->fifoa_clk));
+       } else {
+               /* Configure FIFO A to use DSI formatter 0 */
+               mcde_configure_fifo(mcde, MCDE_FIFO_A, MCDE_DSI_FORMATTER_0,
+                                   fifo_wtrmrk);
 
-       /*
-        * This brings up the DSI bridge which is tightly connected
-        * to the MCDE DSI formatter.
-        *
-        * FIXME: if we want to use another formatter, such as DPI,
-        * we need to be more elaborate here and select the appropriate
-        * bridge.
-        */
-       mcde_dsi_enable(mcde->bridge);
+               /*
+                * This brings up the DSI bridge which is tightly connected
+                * to the MCDE DSI formatter.
+                */
+               mcde_dsi_enable(mcde->bridge);
 
-       /* Configure the DSI formatter 0 for the DSI panel output */
-       mcde_configure_dsi_formatter(mcde, MCDE_DSI_FORMATTER_0,
-                                    formatter_frame, pkt_size);
+               /* Configure the DSI formatter 0 for the DSI panel output */
+               mcde_configure_dsi_formatter(mcde, MCDE_DSI_FORMATTER_0,
+                                            dsi_formatter_frame, dsi_pkt_size);
+       }
 
        switch (mcde->flow_mode) {
        case MCDE_COMMAND_TE_FLOW:
        case MCDE_COMMAND_BTA_TE_FLOW:
        case MCDE_VIDEO_TE_FLOW:
-               /* We are using TE in some comination */
+               /* We are using TE in some combination */
                if (mode->flags & DRM_MODE_FLAG_NVSYNC)
                        val = MCDE_VSCRC_VSPOL;
                else
@@ -1069,8 +1311,12 @@ static void mcde_display_disable(struct drm_simple_display_pipe *pipe)
        /* Disable FIFO A flow */
        mcde_disable_fifo(mcde, MCDE_FIFO_A, true);
 
-       /* This disables the DSI bridge */
-       mcde_dsi_disable(mcde->bridge);
+       if (mcde->dpi_output) {
+               clk_disable_unprepare(mcde->fifoa_clk);
+       } else {
+               /* This disables the DSI bridge */
+               mcde_dsi_disable(mcde->bridge);
+       }
 
        event = crtc->state->event;
        if (event) {
@@ -1261,6 +1507,10 @@ int mcde_display_init(struct drm_device *drm)
                DRM_FORMAT_YUV422,
        };
 
+       ret = mcde_init_clock_divider(mcde);
+       if (ret)
+               return ret;
+
        ret = drm_simple_display_pipe_init(drm, &mcde->pipe,
                                           &mcde_display_funcs,
                                           formats, ARRAY_SIZE(formats),
index d3ac7ef..2ad78c5 100644 (file)
 #define MCDE_OVLXCOMP_Z_SHIFT 27
 #define MCDE_OVLXCOMP_Z_MASK 0x78000000
 
+/* DPI/TV configuration registers, channel A and B */
+#define MCDE_TVCRA 0x00000838
+#define MCDE_TVCRB 0x00000A38
+#define MCDE_TVCR_MOD_TV BIT(0) /* 0 = LCD mode */
+#define MCDE_TVCR_INTEREN BIT(1)
+#define MCDE_TVCR_IFIELD BIT(2)
+#define MCDE_TVCR_TVMODE_SDTV_656P (0 << 3)
+#define MCDE_TVCR_TVMODE_SDTV_656P_LE (3 << 3)
+#define MCDE_TVCR_TVMODE_SDTV_656P_BE (4 << 3)
+#define MCDE_TVCR_SDTVMODE_Y0CBY1CR (0 << 6)
+#define MCDE_TVCR_SDTVMODE_CBY0CRY1 (1 << 6)
+#define MCDE_TVCR_AVRGEN BIT(8)
+#define MCDE_TVCR_CKINV BIT(9)
+
+/* TV blanking control register 1, channel A and B */
+#define MCDE_TVBL1A 0x0000083C
+#define MCDE_TVBL1B 0x00000A3C
+#define MCDE_TVBL1_BEL1_SHIFT 0 /* VFP vertical front porch 11 bits */
+#define MCDE_TVBL1_BSL1_SHIFT 16 /* VSW vertical sync pulse width 11 bits */
+
+/* Pixel processing TV start line, channel A and B */
+#define MCDE_TVISLA 0x00000840
+#define MCDE_TVISLB 0x00000A40
+#define MCDE_TVISL_FSL1_SHIFT 0 /* Field 1 identification start line 11 bits */
+#define MCDE_TVISL_FSL2_SHIFT 16 /* Field 2 identification start line 11 bits */
+
+/* Pixel processing TV DVO offset */
+#define MCDE_TVDVOA 0x00000844
+#define MCDE_TVDVOB 0x00000A44
+#define MCDE_TVDVO_DVO1_SHIFT 0 /* VBP vertical back porch 0 = 0 */
+#define MCDE_TVDVO_DVO2_SHIFT 16
+
+/*
+ * Pixel processing TV Timing 1
+ * HBP horizontal back porch 11 bits horizontal offset
+ * 0 = 1 pixel HBP, 255 = 256 pixels, so actual value - 1
+ */
+#define MCDE_TVTIM1A 0x0000084C
+#define MCDE_TVTIM1B 0x00000A4C
+
+/* Pixel processing TV LBALW */
+/* 0 = 1 clock cycle, 255 = 256 clock cycles */
+#define MCDE_TVLBALWA 0x00000850
+#define MCDE_TVLBALWB 0x00000A50
+#define MCDE_TVLBALW_LBW_SHIFT 0 /* HSW horizonal sync width, line blanking width 11 bits */
+#define MCDE_TVLBALW_ALW_SHIFT 16 /* HFP horizontal front porch, active line width 11 bits */
+
+/* TV blanking control register 1, channel A and B */
+#define MCDE_TVBL2A 0x00000854
+#define MCDE_TVBL2B 0x00000A54
+#define MCDE_TVBL2_BEL2_SHIFT 0 /* Field 2 blanking end line 11 bits */
+#define MCDE_TVBL2_BSL2_SHIFT 16 /* Field 2 blanking start line 11 bits */
+
+/* Pixel processing TV background */
+#define MCDE_TVBLUA 0x00000858
+#define MCDE_TVBLUB 0x00000A58
+#define MCDE_TVBLU_TVBLU_SHIFT 0 /* 8 bits luminance */
+#define MCDE_TVBLU_TVBCB_SHIFT 8 /* 8 bits Cb chrominance */
+#define MCDE_TVBLU_TVBCR_SHIFT 16 /* 8 bits Cr chrominance */
+
+/* Pixel processing LCD timing 1 */
+#define MCDE_LCDTIM1A 0x00000860
+#define MCDE_LCDTIM1B 0x00000A60
+/* inverted vertical sync pulse for HRTFT 0 = active low, 1 active high */
+#define MCDE_LCDTIM1B_IVP BIT(19)
+/* inverted vertical sync, 0 = active high (the normal), 1 = active low */
+#define MCDE_LCDTIM1B_IVS BIT(20)
+/* inverted horizontal sync, 0 = active high (the normal), 1 = active low */
+#define MCDE_LCDTIM1B_IHS BIT(21)
+/* inverted panel clock 0 = rising edge data out, 1 = falling edge data out */
+#define MCDE_LCDTIM1B_IPC BIT(22)
+/* invert output enable 0 = active high, 1 = active low */
+#define MCDE_LCDTIM1B_IOE BIT(23)
+
 #define MCDE_CRC 0x00000C00
 #define MCDE_CRC_C1EN BIT(2)
 #define MCDE_CRC_C2EN BIT(3)
 #define MCDE_CRB1 0x00000A04
 #define MCDE_CRX1_PCD_SHIFT 0
 #define MCDE_CRX1_PCD_MASK 0x000003FF
+#define MCDE_CRX1_PCD_BITS 10
 #define MCDE_CRX1_CLKSEL_SHIFT 10
 #define MCDE_CRX1_CLKSEL_MASK 0x00001C00
 #define MCDE_CRX1_CLKSEL_CLKPLL72 0
 #define MCDE_ROTACONF 0x0000087C
 #define MCDE_ROTBCONF 0x00000A7C
 
+/* Synchronization event configuration */
 #define MCDE_SYNCHCONFA 0x00000880
 #define MCDE_SYNCHCONFB 0x00000A80
+#define MCDE_SYNCHCONF_HWREQVEVENT_SHIFT 0
+#define MCDE_SYNCHCONF_HWREQVEVENT_VSYNC (0 << 0)
+#define MCDE_SYNCHCONF_HWREQVEVENT_BACK_PORCH (1 << 0)
+#define MCDE_SYNCHCONF_HWREQVEVENT_ACTIVE_VIDEO (2 << 0)
+#define MCDE_SYNCHCONF_HWREQVEVENT_FRONT_PORCH (3 << 0)
+#define MCDE_SYNCHCONF_HWREQVCNT_SHIFT 2 /* 14 bits */
+#define MCDE_SYNCHCONF_SWINTVEVENT_VSYNC (0 << 16)
+#define MCDE_SYNCHCONF_SWINTVEVENT_BACK_PORCH (1 << 16)
+#define MCDE_SYNCHCONF_SWINTVEVENT_ACTIVE_VIDEO (2 << 16)
+#define MCDE_SYNCHCONF_SWINTVEVENT_FRONT_PORCH (3 << 16)
+#define MCDE_SYNCHCONF_SWINTVCNT_SHIFT 18 /* 14 bits */
 
 /* Channel A+B control registers */
 #define MCDE_CTRLA 0x00000884
 #define MCDE_DSICONF0_PACKING_MASK 0x00700000
 #define MCDE_DSICONF0_PACKING_RGB565 0
 #define MCDE_DSICONF0_PACKING_RGB666 1
-#define MCDE_DSICONF0_PACKING_RGB666_PACKED 2
-#define MCDE_DSICONF0_PACKING_RGB888 3
+#define MCDE_DSICONF0_PACKING_RGB888 2
+#define MCDE_DSICONF0_PACKING_BGR888 3
 #define MCDE_DSICONF0_PACKING_HDTV 4
 
 #define MCDE_DSIVID0FRAME 0x00000E04
index 8253e2f..ecb70b4 100644 (file)
@@ -62,6 +62,8 @@ enum mcde_flow_mode {
        MCDE_VIDEO_TE_FLOW,
        /* Video mode with the formatter itself as sync source */
        MCDE_VIDEO_FORMATTER_FLOW,
+       /* DPI video with the formatter itsels as sync source */
+       MCDE_DPI_FORMATTER_FLOW,
 };
 
 struct mcde {
@@ -72,6 +74,7 @@ struct mcde {
        struct drm_connector *connector;
        struct drm_simple_display_pipe pipe;
        struct mipi_dsi_device *mdsi;
+       bool dpi_output;
        s16 stride;
        enum mcde_flow_mode flow_mode;
        unsigned int flow_active;
@@ -82,6 +85,11 @@ struct mcde {
        struct clk *mcde_clk;
        struct clk *lcd_clk;
        struct clk *hdmi_clk;
+       /* Handles to the clock dividers for FIFO A and B */
+       struct clk *fifoa_clk;
+       struct clk *fifob_clk;
+       /* Locks the MCDE FIFO control register A and B */
+       spinlock_t fifo_crx1_lock;
 
        struct regulator *epod;
        struct regulator *vana;
@@ -105,4 +113,6 @@ void mcde_display_irq(struct mcde *mcde);
 void mcde_display_disable_irqs(struct mcde *mcde);
 int mcde_display_init(struct drm_device *drm);
 
+int mcde_init_clock_divider(struct mcde *mcde);
+
 #endif /* _MCDE_DRM_H_ */
index 9d25181..e60566a 100644 (file)
  * The hardware has four display pipes, and the layout is a little
  * bit like this::
  *
- *   Memory     -> Overlay -> Channel -> FIFO -> 5 formatters -> DSI/DPI
- *   External      0..5       0..3       A,B,    3 x DSI         bridge
+ *   Memory     -> Overlay -> Channel -> FIFO -> 8 formatters -> DSI/DPI
+ *   External      0..5       0..3       A,B,    6 x DSI         bridge
  *   source 0..9                         C0,C1   2 x DPI
  *
  * FIFOs A and B are for LCD and HDMI while FIFO CO/C1 are for
  * panels with embedded buffer.
- * 3 of the formatters are for DSI.
+ * 6 of the formatters are for DSI, 3 pairs for VID/CMD respectively.
  * 2 of the formatters are for DPI.
  *
  * Behind the formatters are the DSI or DPI ports that route to
@@ -130,9 +130,37 @@ static int mcde_modeset_init(struct drm_device *drm)
        struct mcde *mcde = to_mcde(drm);
        int ret;
 
+       /*
+        * If no other bridge was found, check if we have a DPI panel or
+        * any other bridge connected directly to the MCDE DPI output.
+        * If a DSI bridge is found, DSI will take precedence.
+        *
+        * TODO: more elaborate bridge selection if we have more than one
+        * thing attached to the system.
+        */
        if (!mcde->bridge) {
-               dev_err(drm->dev, "no display output bridge yet\n");
-               return -EPROBE_DEFER;
+               struct drm_panel *panel;
+               struct drm_bridge *bridge;
+
+               ret = drm_of_find_panel_or_bridge(drm->dev->of_node,
+                                                 0, 0, &panel, &bridge);
+               if (ret) {
+                       dev_err(drm->dev,
+                               "Could not locate any output bridge or panel\n");
+                       return ret;
+               }
+               if (panel) {
+                       bridge = drm_panel_bridge_add_typed(panel,
+                                       DRM_MODE_CONNECTOR_DPI);
+                       if (IS_ERR(bridge)) {
+                               dev_err(drm->dev,
+                                       "Could not connect panel bridge\n");
+                               return PTR_ERR(bridge);
+                       }
+               }
+               mcde->dpi_output = true;
+               mcde->bridge = bridge;
+               mcde->flow_mode = MCDE_DPI_FORMATTER_FLOW;
        }
 
        mode_config = &drm->mode_config;
@@ -156,13 +184,7 @@ static int mcde_modeset_init(struct drm_device *drm)
                return ret;
        }
 
-       /*
-        * Attach the DSI bridge
-        *
-        * TODO: when adding support for the DPI bridge or several DSI bridges,
-        * we selectively connect the bridge(s) here instead of this simple
-        * attachment.
-        */
+       /* Attach the bridge. */
        ret = drm_simple_display_pipe_attach_bridge(&mcde->pipe,
                                                    mcde->bridge);
        if (ret) {
index 7f8eea4..aad75a2 100644 (file)
@@ -145,8 +145,6 @@ struct meson_dw_hdmi {
        struct reset_control *hdmitx_apb;
        struct reset_control *hdmitx_ctrl;
        struct reset_control *hdmitx_phy;
-       struct clk *hdmi_pclk;
-       struct clk *venci_clk;
        struct regulator *hdmi_supply;
        u32 irq_stat;
        struct dw_hdmi *hdmi;
@@ -946,6 +944,29 @@ static void meson_disable_regulator(void *data)
        regulator_disable(data);
 }
 
+static void meson_disable_clk(void *data)
+{
+       clk_disable_unprepare(data);
+}
+
+static int meson_enable_clk(struct device *dev, char *name)
+{
+       struct clk *clk;
+       int ret;
+
+       clk = devm_clk_get(dev, name);
+       if (IS_ERR(clk)) {
+               dev_err(dev, "Unable to get %s pclk\n", name);
+               return PTR_ERR(clk);
+       }
+
+       ret = clk_prepare_enable(clk);
+       if (!ret)
+               ret = devm_add_action_or_reset(dev, meson_disable_clk, clk);
+
+       return ret;
+}
+
 static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
                                void *data)
 {
@@ -1026,19 +1047,17 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
        if (IS_ERR(meson_dw_hdmi->hdmitx))
                return PTR_ERR(meson_dw_hdmi->hdmitx);
 
-       meson_dw_hdmi->hdmi_pclk = devm_clk_get(dev, "isfr");
-       if (IS_ERR(meson_dw_hdmi->hdmi_pclk)) {
-               dev_err(dev, "Unable to get HDMI pclk\n");
-               return PTR_ERR(meson_dw_hdmi->hdmi_pclk);
-       }
-       clk_prepare_enable(meson_dw_hdmi->hdmi_pclk);
+       ret = meson_enable_clk(dev, "isfr");
+       if (ret)
+               return ret;
 
-       meson_dw_hdmi->venci_clk = devm_clk_get(dev, "venci");
-       if (IS_ERR(meson_dw_hdmi->venci_clk)) {
-               dev_err(dev, "Unable to get venci clk\n");
-               return PTR_ERR(meson_dw_hdmi->venci_clk);
-       }
-       clk_prepare_enable(meson_dw_hdmi->venci_clk);
+       ret = meson_enable_clk(dev, "iahb");
+       if (ret)
+               return ret;
+
+       ret = meson_enable_clk(dev, "venci");
+       if (ret)
+               return ret;
 
        dw_plat_data->regm = devm_regmap_init(dev, NULL, meson_dw_hdmi,
                                              &meson_dw_hdmi_regmap_config);
@@ -1071,6 +1090,8 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
 
        encoder->possible_crtcs = BIT(0);
 
+       meson_dw_hdmi_init(meson_dw_hdmi);
+
        DRM_DEBUG_DRIVER("encoder initialized\n");
 
        /* Bridge / Connector */
@@ -1095,8 +1116,6 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
        if (IS_ERR(meson_dw_hdmi->hdmi))
                return PTR_ERR(meson_dw_hdmi->hdmi);
 
-       meson_dw_hdmi_init(meson_dw_hdmi);
-
        next_bridge = of_drm_find_bridge(pdev->dev.of_node);
        if (next_bridge)
                drm_bridge_attach(encoder, next_bridge,
index 0f07f25..a977c9f 100644 (file)
@@ -37,7 +37,6 @@ static const struct drm_driver mgag200_driver = {
        .major = DRIVER_MAJOR,
        .minor = DRIVER_MINOR,
        .patchlevel = DRIVER_PATCHLEVEL,
-       .gem_create_object = drm_gem_shmem_create_object_cached,
        DRM_GEM_SHMEM_DRIVER_OPS,
 };
 
index 82cbaf3..9a7c49b 100644 (file)
@@ -211,10 +211,8 @@ int msm_gem_mmap_obj(struct drm_gem_object *obj,
                 * address_space (so unmap_mapping_range does what we want,
                 * in particular in the case of mmap'd dmabufs)
                 */
-               fput(vma->vm_file);
-               get_file(obj->filp);
                vma->vm_pgoff = 0;
-               vma->vm_file  = obj->filp;
+               vma_set_file(vma, obj->filp);
 
                vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
        }
index 6faf17b..6da9355 100644 (file)
@@ -134,11 +134,8 @@ static int mxsfb_attach_bridge(struct mxsfb_drm_private *mxsfb)
                return -ENODEV;
 
        ret = drm_bridge_attach(&mxsfb->encoder, bridge, NULL, 0);
-       if (ret) {
-               DRM_DEV_ERROR(drm->dev,
-                             "failed to attach bridge: %d\n", ret);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(drm->dev, ret, "Failed to attach bridge\n");
 
        mxsfb->bridge = bridge;
 
@@ -212,7 +209,8 @@ static int mxsfb_load(struct drm_device *drm,
 
        ret = mxsfb_attach_bridge(mxsfb);
        if (ret) {
-               dev_err(drm->dev, "Cannot connect bridge: %d\n", ret);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(drm->dev, "Cannot connect bridge: %d\n", ret);
                goto err_vblank;
        }
 
index 36d6b60..33fff38 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/hdmi.h>
 #include <linux/component.h>
 
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_edid.h>
@@ -1161,8 +1162,10 @@ nv50_msto_new(struct drm_device *dev, struct nv50_head *head, int id)
 
 static struct drm_encoder *
 nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
-                             struct drm_connector_state *connector_state)
+                             struct drm_atomic_state *state)
 {
+       struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
+                                                                                        connector);
        struct nv50_mstc *mstc = nv50_mstc(connector);
        struct drm_crtc *crtc = connector_state->crtc;
 
index 1386b0f..c85b1af 100644 (file)
@@ -942,16 +942,6 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
        struct nouveau_drm_tile *new_tile = NULL;
        int ret = 0;
 
-       if ((old_reg->mem_type == TTM_PL_SYSTEM &&
-            new_reg->mem_type == TTM_PL_VRAM) ||
-           (old_reg->mem_type == TTM_PL_VRAM &&
-            new_reg->mem_type == TTM_PL_SYSTEM)) {
-               hop->fpfn = 0;
-               hop->lpfn = 0;
-               hop->mem_type = TTM_PL_TT;
-               hop->flags = 0;
-               return -EMULTIHOP;
-       }
 
        if (new_reg->mem_type == TTM_PL_TT) {
                ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
@@ -995,14 +985,25 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
 
        /* Hardware assisted copy. */
        if (drm->ttm.move) {
+               if ((old_reg->mem_type == TTM_PL_SYSTEM &&
+                    new_reg->mem_type == TTM_PL_VRAM) ||
+                   (old_reg->mem_type == TTM_PL_VRAM &&
+                    new_reg->mem_type == TTM_PL_SYSTEM)) {
+                       hop->fpfn = 0;
+                       hop->lpfn = 0;
+                       hop->mem_type = TTM_PL_TT;
+                       hop->flags = 0;
+                       return -EMULTIHOP;
+               }
                ret = nouveau_bo_move_m2mf(bo, evict, ctx,
                                           new_reg);
-               if (!ret)
-                       goto out;
-       }
+       } else
+               ret = -ENODEV;
 
-       /* Fallback to software copy. */
-       ret = ttm_bo_move_memcpy(bo, ctx, new_reg);
+       if (ret) {
+               /* Fallback to software copy. */
+               ret = ttm_bo_move_memcpy(bo, ctx, new_reg);
+       }
 
 out:
        if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
index 68c271f..30d299c 100644 (file)
@@ -564,9 +564,8 @@ int omap_gem_mmap_obj(struct drm_gem_object *obj,
                 * address_space (so unmap_mapping_range does what we want,
                 * in particular in the case of mmap'd dmabufs)
                 */
-               fput(vma->vm_file);
                vma->vm_pgoff = 0;
-               vma->vm_file  = get_file(obj->filp);
+               vma_set_file(vma, obj->filp);
 
                vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
        }
index 210e70d..6b4e97b 100644 (file)
 #include "panel-samsung-s6e63m0.h"
 
 /* Manufacturer Command Set */
-#define MCS_ELVSS_ON                0xb1
-#define MCS_MIECTL1                0xc0
-#define MCS_BCMODE                              0xc1
+#define MCS_ELVSS_ON           0xb1
+#define MCS_TEMP_SWIRE         0xb2
+#define MCS_MIECTL1            0xc0
+#define MCS_BCMODE             0xc1
 #define MCS_ERROR_CHECK                0xd5
 #define MCS_READ_ID1           0xda
 #define MCS_READ_ID2           0xdb
 #define MCS_READ_ID3           0xdc
 #define MCS_LEVEL_2_KEY                0xf0
 #define MCS_MTP_KEY            0xf1
-#define MCS_DISCTL   0xf2
-#define MCS_SRCCTL           0xf6
-#define MCS_IFCTL                       0xf7
-#define MCS_PANELCTL         0xF8
-#define MCS_PGAMMACTL                   0xfa
+#define MCS_DISCTL             0xf2
+#define MCS_SRCCTL             0xf6
+#define MCS_IFCTL              0xf7
+#define MCS_PANELCTL           0xf8
+#define MCS_PGAMMACTL          0xfa
 
 #define S6E63M0_LCD_ID_VALUE_M2                0xA4
 #define S6E63M0_LCD_ID_VALUE_SM2       0xB4
 #define S6E63M0_LCD_ID_VALUE_SM2_1     0xB6
 
-#define NUM_GAMMA_LEVELS             11
-#define GAMMA_TABLE_COUNT           23
+#define NUM_GAMMA_LEVELS       28
+#define GAMMA_TABLE_COUNT      23
 
-#define MAX_BRIGHTNESS              (NUM_GAMMA_LEVELS - 1)
+#define MAX_BRIGHTNESS         (NUM_GAMMA_LEVELS - 1)
 
 /* array of gamma tables for gamma value 2.2 */
 static u8 const s6e63m0_gamma_22[NUM_GAMMA_LEVELS][GAMMA_TABLE_COUNT] = {
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x78, 0xEC, 0x3D, 0xC8,
-         0xC2, 0xB6, 0xC4, 0xC7, 0xB6, 0xD5, 0xD7,
-         0xCC, 0x00, 0x39, 0x00, 0x36, 0x00, 0x51 },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x73, 0x4A, 0x3D, 0xC0,
-         0xC2, 0xB1, 0xBB, 0xBE, 0xAC, 0xCE, 0xCF,
-         0xC5, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x82 },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x70, 0x51, 0x3E, 0xBF,
-         0xC1, 0xAF, 0xB9, 0xBC, 0xAB, 0xCC, 0xCC,
-         0xC2, 0x00, 0x65, 0x00, 0x67, 0x00, 0x8D },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x6C, 0x54, 0x3A, 0xBC,
-         0xBF, 0xAC, 0xB7, 0xBB, 0xA9, 0xC9, 0xC9,
-         0xBE, 0x00, 0x71, 0x00, 0x73, 0x00, 0x9E },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x69, 0x54, 0x37, 0xBB,
-         0xBE, 0xAC, 0xB4, 0xB7, 0xA6, 0xC7, 0xC8,
-         0xBC, 0x00, 0x7B, 0x00, 0x7E, 0x00, 0xAB },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x66, 0x55, 0x34, 0xBA,
-         0xBD, 0xAB, 0xB1, 0xB5, 0xA3, 0xC5, 0xC6,
-         0xB9, 0x00, 0x85, 0x00, 0x88, 0x00, 0xBA },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x63, 0x53, 0x31, 0xB8,
-         0xBC, 0xA9, 0xB0, 0xB5, 0xA2, 0xC4, 0xC4,
-         0xB8, 0x00, 0x8B, 0x00, 0x8E, 0x00, 0xC2 },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x62, 0x54, 0x30, 0xB9,
-         0xBB, 0xA9, 0xB0, 0xB3, 0xA1, 0xC1, 0xC3,
-         0xB7, 0x00, 0x91, 0x00, 0x95, 0x00, 0xDA },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x66, 0x58, 0x34, 0xB6,
-         0xBA, 0xA7, 0xAF, 0xB3, 0xA0, 0xC1, 0xC2,
-         0xB7, 0x00, 0x97, 0x00, 0x9A, 0x00, 0xD1 },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x64, 0x56, 0x33, 0xB6,
-         0xBA, 0xA8, 0xAC, 0xB1, 0x9D, 0xC1, 0xC1,
-         0xB7, 0x00, 0x9C, 0x00, 0x9F, 0x00, 0xD6 },
-       { MCS_PGAMMACTL, 0x00,
-         0x18, 0x08, 0x24, 0x5f, 0x50, 0x2d, 0xB6,
-         0xB9, 0xA7, 0xAd, 0xB1, 0x9f, 0xbe, 0xC0,
-         0xB5, 0x00, 0xa0, 0x00, 0xa4, 0x00, 0xdb },
+       /* 30 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0xA1, 0x51, 0x7B, 0xCE,
+         0xCB, 0xC2, 0xC7, 0xCB, 0xBC, 0xDA, 0xDD,
+         0xD3, 0x00, 0x53, 0x00, 0x52, 0x00, 0x6F, },
+       /* 40 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x97, 0x58, 0x71, 0xCC,
+         0xCB, 0xC0, 0xC5, 0xC9, 0xBA, 0xD9, 0xDC,
+         0xD1, 0x00, 0x5B, 0x00, 0x5A, 0x00, 0x7A, },
+       /* 50 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x96, 0x58, 0x72, 0xCB,
+         0xCA, 0xBF, 0xC6, 0xC9, 0xBA, 0xD6, 0xD9,
+         0xCD, 0x00, 0x61, 0x00, 0x61, 0x00, 0x83, },
+       /* 60 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x91, 0x5E, 0x6E, 0xC9,
+         0xC9, 0xBD, 0xC4, 0xC9, 0xB8, 0xD3, 0xD7,
+         0xCA, 0x00, 0x69, 0x00, 0x67, 0x00, 0x8D, },
+       /* 70 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x8E, 0x62, 0x6B, 0xC7,
+         0xC9, 0xBB, 0xC3, 0xC7, 0xB7, 0xD3, 0xD7,
+         0xCA, 0x00, 0x6E, 0x00, 0x6C, 0x00, 0x94, },
+       /* 80 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x89, 0x68, 0x65, 0xC9,
+         0xC9, 0xBC, 0xC1, 0xC5, 0xB6, 0xD2, 0xD5,
+         0xC9, 0x00, 0x73, 0x00, 0x72, 0x00, 0x9A, },
+       /* 90 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x89, 0x69, 0x64, 0xC7,
+         0xC8, 0xBB, 0xC0, 0xC5, 0xB4, 0xD2, 0xD5,
+         0xC9, 0x00, 0x77, 0x00, 0x76, 0x00, 0xA0, },
+       /* 100 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x86, 0x69, 0x60, 0xC6,
+         0xC8, 0xBA, 0xBF, 0xC4, 0xB4, 0xD0, 0xD4,
+         0xC6, 0x00, 0x7C, 0x00, 0x7A, 0x00, 0xA7, },
+       /* 110 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x86, 0x6A, 0x60, 0xC5,
+         0xC7, 0xBA, 0xBD, 0xC3, 0xB2, 0xD0, 0xD4,
+         0xC5, 0x00, 0x80, 0x00, 0x7E, 0x00, 0xAD, },
+       /* 120 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x82, 0x6B, 0x5E, 0xC4,
+         0xC8, 0xB9, 0xBD, 0xC2, 0xB1, 0xCE, 0xD2,
+         0xC4, 0x00, 0x85, 0x00, 0x82, 0x00, 0xB3, },
+       /* 130 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x8C, 0x6C, 0x60, 0xC3,
+         0xC7, 0xB9, 0xBC, 0xC1, 0xAF, 0xCE, 0xD2,
+         0xC3, 0x00, 0x88, 0x00, 0x86, 0x00, 0xB8, },
+       /* 140 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x80, 0x6C, 0x5F, 0xC1,
+         0xC6, 0xB7, 0xBC, 0xC1, 0xAE, 0xCD, 0xD0,
+         0xC2, 0x00, 0x8C, 0x00, 0x8A, 0x00, 0xBE, },
+       /* 150 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x80, 0x6E, 0x5F, 0xC1,
+         0xC6, 0xB6, 0xBC, 0xC0, 0xAE, 0xCC, 0xD0,
+         0xC2, 0x00, 0x8F, 0x00, 0x8D, 0x00, 0xC2, },
+       /* 160 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x7F, 0x6E, 0x5F, 0xC0,
+         0xC6, 0xB5, 0xBA, 0xBF, 0xAD, 0xCB, 0xCF,
+         0xC0, 0x00, 0x94, 0x00, 0x91, 0x00, 0xC8, },
+       /* 170 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x7C, 0x6D, 0x5C, 0xC0,
+         0xC6, 0xB4, 0xBB, 0xBE, 0xAD, 0xCA, 0xCF,
+         0xC0, 0x00, 0x96, 0x00, 0x94, 0x00, 0xCC, },
+       /* 180 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x7B, 0x6D, 0x5B, 0xC0,
+         0xC5, 0xB3, 0xBA, 0xBE, 0xAD, 0xCA, 0xCE,
+         0xBF, 0x00, 0x99, 0x00, 0x97, 0x00, 0xD0, },
+       /* 190 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x7A, 0x6D, 0x59, 0xC1,
+         0xC5, 0xB4, 0xB8, 0xBD, 0xAC, 0xC9, 0xCE,
+         0xBE, 0x00, 0x9D, 0x00, 0x9A, 0x00, 0xD5, },
+       /* 200 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x79, 0x6D, 0x58, 0xC1,
+         0xC4, 0xB4, 0xB6, 0xBD, 0xAA, 0xCA, 0xCD,
+         0xBE, 0x00, 0x9F, 0x00, 0x9D, 0x00, 0xD9, },
+       /* 210 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x79, 0x6D, 0x57, 0xC0,
+         0xC4, 0xB4, 0xB7, 0xBD, 0xAA, 0xC8, 0xCC,
+         0xBD, 0x00, 0xA2, 0x00, 0xA0, 0x00, 0xDD, },
+       /* 220 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x78, 0x6F, 0x58, 0xBF,
+         0xC4, 0xB3, 0xB5, 0xBB, 0xA9, 0xC8, 0xCC,
+         0xBC, 0x00, 0xA6, 0x00, 0xA3, 0x00, 0xE2, },
+       /* 230 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x75, 0x6F, 0x56, 0xBF,
+         0xC3, 0xB2, 0xB6, 0xBB, 0xA8, 0xC7, 0xCB,
+         0xBC, 0x00, 0xA8, 0x00, 0xA6, 0x00, 0xE6, },
+       /* 240 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x76, 0x6F, 0x56, 0xC0,
+         0xC3, 0xB2, 0xB5, 0xBA, 0xA8, 0xC6, 0xCB,
+         0xBB, 0x00, 0xAA, 0x00, 0xA8, 0x00, 0xE9, },
+       /* 250 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x74, 0x6D, 0x54, 0xBF,
+         0xC3, 0xB2, 0xB4, 0xBA, 0xA7, 0xC6, 0xCA,
+         0xBA, 0x00, 0xAD, 0x00, 0xAB, 0x00, 0xED, },
+       /* 260 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x74, 0x6E, 0x54, 0xBD,
+         0xC2, 0xB0, 0xB5, 0xBA, 0xA7, 0xC5, 0xC9,
+         0xBA, 0x00, 0xB0, 0x00, 0xAE, 0x00, 0xF1, },
+       /* 270 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x71, 0x6C, 0x50, 0xBD,
+         0xC3, 0xB0, 0xB4, 0xB8, 0xA6, 0xC6, 0xC9,
+         0xBB, 0x00, 0xB2, 0x00, 0xB1, 0x00, 0xF4, },
+       /* 280 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x6E, 0x6C, 0x4D, 0xBE,
+         0xC3, 0xB1, 0xB3, 0xB8, 0xA5, 0xC6, 0xC8,
+         0xBB, 0x00, 0xB4, 0x00, 0xB3, 0x00, 0xF7, },
+       /* 290 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x71, 0x70, 0x50, 0xBD,
+         0xC1, 0xB0, 0xB2, 0xB8, 0xA4, 0xC6, 0xC7,
+         0xBB, 0x00, 0xB6, 0x00, 0xB6, 0x00, 0xFA, },
+       /* 300 cd */
+       { MCS_PGAMMACTL, 0x02,
+         0x18, 0x08, 0x24, 0x70, 0x6E, 0x4E, 0xBC,
+         0xC0, 0xAF, 0xB3, 0xB8, 0xA5, 0xC5, 0xC7,
+         0xBB, 0x00, 0xB9, 0x00, 0xB8, 0x00, 0xFC, },
+};
+
+#define NUM_ACL_LEVELS 7
+#define ACL_TABLE_COUNT 28
+
+static u8 const s6e63m0_acl[NUM_ACL_LEVELS][ACL_TABLE_COUNT] = {
+       /* NULL ACL */
+       { MCS_BCMODE,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00 },
+       /* 40P ACL */
+       { MCS_BCMODE,
+         0x4D, 0x96, 0x1D, 0x00, 0x00, 0x01, 0xDF, 0x00,
+         0x00, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x01, 0x06, 0x0C, 0x11, 0x16, 0x1C, 0x21, 0x26,
+         0x2B, 0x31, 0x36 },
+       /* 43P ACL */
+       { MCS_BCMODE,
+         0x4D, 0x96, 0x1D, 0x00, 0x00, 0x01, 0xDF, 0x00,
+         0x00, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x01, 0x07, 0x0C, 0x12, 0x18, 0x1E, 0x23, 0x29,
+         0x2F, 0x34, 0x3A },
+       /* 45P ACL */
+       { MCS_BCMODE,
+         0x4D, 0x96, 0x1D, 0x00, 0x00, 0x01, 0xDF, 0x00,
+         0x00, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x01, 0x07, 0x0D, 0x13, 0x19, 0x1F, 0x25, 0x2B,
+         0x31, 0x37, 0x3D },
+       /* 47P ACL */
+       { MCS_BCMODE,
+         0x4D, 0x96, 0x1D, 0x00, 0x00, 0x01, 0xDF, 0x00,
+         0x00, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x01, 0x07, 0x0E, 0x14, 0x1B, 0x21, 0x27, 0x2E,
+         0x34, 0x3B, 0x41 },
+       /* 48P ACL */
+       { MCS_BCMODE,
+         0x4D, 0x96, 0x1D, 0x00, 0x00, 0x01, 0xDF, 0x00,
+         0x00, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x01, 0x08, 0x0E, 0x15, 0x1B, 0x22, 0x29, 0x2F,
+         0x36, 0x3C, 0x43 },
+       /* 50P ACL */
+       { MCS_BCMODE,
+         0x4D, 0x96, 0x1D, 0x00, 0x00, 0x01, 0xDF, 0x00,
+         0x00, 0x03, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x01, 0x08, 0x0F, 0x16, 0x1D, 0x24, 0x2A, 0x31,
+         0x38, 0x3F, 0x46 },
+};
+
+/* This tells us which ACL level goes with which gamma */
+static u8 const s6e63m0_acl_per_gamma[NUM_GAMMA_LEVELS] = {
+       /* 30 - 60 cd: ACL off/NULL */
+       0, 0, 0, 0,
+       /* 70 - 250 cd: 40P ACL */
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+       /* 260 - 300 cd: 50P ACL */
+       6, 6, 6, 6, 6,
+};
+
+/* The ELVSS backlight regulator has 5 levels */
+#define S6E63M0_ELVSS_LEVELS 5
+
+static u8 const s6e63m0_elvss_offsets[S6E63M0_ELVSS_LEVELS] = {
+       0x00,   /* not set */
+       0x0D,   /* 30 cd - 100 cd */
+       0x09,   /* 110 cd - 160 cd */
+       0x07,   /* 170 cd - 200 cd */
+       0x00,   /* 210 cd - 300 cd */
+};
+
+/* This tells us which ELVSS level goes with which gamma */
+static u8 const s6e63m0_elvss_per_gamma[NUM_GAMMA_LEVELS] = {
+       /* 30 - 100 cd */
+       1, 1, 1, 1, 1, 1, 1, 1,
+       /* 110 - 160 cd */
+       2, 2, 2, 2, 2, 2,
+       /* 170 - 200 cd */
+       3, 3, 3, 3,
+       /* 210 - 300 cd */
+       4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 };
 
 struct s6e63m0 {
@@ -102,6 +280,7 @@ struct s6e63m0 {
        struct drm_panel panel;
        struct backlight_device *bl_dev;
        u8 lcd_type;
+       u8 elvss_pulse;
 
        struct regulator_bulk_data supplies[2];
        struct gpio_desc *reset_gpio;
@@ -187,17 +366,25 @@ static int s6e63m0_check_lcd_type(struct s6e63m0 *ctx)
 
        dev_info(ctx->dev, "MTP ID: %02x %02x %02x\n", id1, id2, id3);
 
-       /* We attempt to detect what panel is mounted on the controller */
+       /*
+        * We attempt to detect what panel is mounted on the controller.
+        * The third ID byte represents the desired ELVSS pulse for
+        * some displays.
+        */
        switch (id2) {
        case S6E63M0_LCD_ID_VALUE_M2:
                dev_info(ctx->dev, "detected LCD panel AMS397GE MIPI M2\n");
+               ctx->elvss_pulse = id3;
                break;
        case S6E63M0_LCD_ID_VALUE_SM2:
        case S6E63M0_LCD_ID_VALUE_SM2_1:
                dev_info(ctx->dev, "detected LCD panel AMS397GE MIPI SM2\n");
+               ctx->elvss_pulse = id3;
                break;
        default:
                dev_info(ctx->dev, "unknown LCD panel type %02x\n", id2);
+               /* Default ELVSS pulse level */
+               ctx->elvss_pulse = 0x16;
                break;
        }
 
@@ -210,7 +397,7 @@ static void s6e63m0_init(struct s6e63m0 *ctx)
 {
        s6e63m0_dcs_write_seq_static(ctx, MCS_PANELCTL,
                                     0x01, 0x27, 0x27, 0x07, 0x07, 0x54, 0x9f,
-                                    0x63, 0x86, 0x1a, 0x33, 0x0d, 0x00, 0x00);
+                                    0x63, 0x8f, 0x1a, 0x33, 0x0d, 0x00, 0x00);
 
        s6e63m0_dcs_write_seq_static(ctx, MCS_DISCTL,
                                     0x02, 0x03, 0x1c, 0x10, 0x10);
@@ -226,9 +413,8 @@ static void s6e63m0_init(struct s6e63m0 *ctx)
                                     0x01);
 
        s6e63m0_dcs_write_seq_static(ctx, MCS_SRCCTL,
-                                    0x00, 0x8c, 0x07);
-       s6e63m0_dcs_write_seq_static(ctx, 0xb3,
-                                    0xc);
+                                    0x00, 0x8e, 0x07);
+       s6e63m0_dcs_write_seq_static(ctx, 0xb3, 0x6c);
 
        s6e63m0_dcs_write_seq_static(ctx, 0xb5,
                                     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
@@ -247,9 +433,12 @@ static void s6e63m0_init(struct s6e63m0 *ctx)
                                     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
                                     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
                                     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
-                                    0x21, 0x20, 0x1e, 0x1e, 0x00, 0x00, 0x11,
-                                    0x22, 0x33, 0x44, 0x44, 0x44, 0x55, 0x55,
-                                    0x66, 0x66, 0x66, 0x66, 0x66, 0x66);
+                                    0x21, 0x20, 0x1e, 0x1e);
+
+       s6e63m0_dcs_write_seq_static(ctx, 0xb8,
+                                    0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44,
+                                    0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66,
+                                    0x66, 0x66);
 
        s6e63m0_dcs_write_seq_static(ctx, 0xb9,
                                     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
@@ -269,7 +458,7 @@ static void s6e63m0_init(struct s6e63m0 *ctx)
                                     0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x06,
                                     0x09, 0x0d, 0x0f, 0x12, 0x15, 0x18);
 
-       s6e63m0_dcs_write_seq_static(ctx, 0xb2,
+       s6e63m0_dcs_write_seq_static(ctx, MCS_TEMP_SWIRE,
                                     0x10, 0x10, 0x0b, 0x05);
 
        s6e63m0_dcs_write_seq_static(ctx, MCS_MIECTL1,
@@ -447,15 +636,33 @@ static const struct drm_panel_funcs s6e63m0_drm_funcs = {
 static int s6e63m0_set_brightness(struct backlight_device *bd)
 {
        struct s6e63m0 *ctx = bl_get_data(bd);
-
        int brightness = bd->props.brightness;
-
-       /* disable and set new gamma */
+       u8 elvss_val;
+       u8 elvss_cmd_set[5];
+       int i;
+
+       /* Adjust ELVSS to candela level */
+       i = s6e63m0_elvss_per_gamma[brightness];
+       elvss_val = ctx->elvss_pulse + s6e63m0_elvss_offsets[i];
+       if (elvss_val > 0x1f)
+               elvss_val = 0x1f;
+       elvss_cmd_set[0] = MCS_TEMP_SWIRE;
+       elvss_cmd_set[1] = elvss_val;
+       elvss_cmd_set[2] = elvss_val;
+       elvss_cmd_set[3] = elvss_val;
+       elvss_cmd_set[4] = elvss_val;
+       s6e63m0_dcs_write(ctx, elvss_cmd_set, 5);
+
+       /* Update the ACL per gamma value */
+       i = s6e63m0_acl_per_gamma[brightness];
+       s6e63m0_dcs_write(ctx, s6e63m0_acl[i],
+                         ARRAY_SIZE(s6e63m0_acl[i]));
+
+       /* Update gamma table */
        s6e63m0_dcs_write(ctx, s6e63m0_gamma_22[brightness],
                          ARRAY_SIZE(s6e63m0_gamma_22[brightness]));
+       s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL, 0x03);
 
-       /* update gamma table. */
-       s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL, 0x01);
 
        return s6e63m0_clear_error(ctx);
 }
index 597f676..41bbec7 100644 (file)
@@ -2267,6 +2267,31 @@ static const struct panel_desc innolux_n116bge = {
        },
 };
 
+static const struct drm_display_mode innolux_n125hce_gn1_mode = {
+       .clock = 162000,
+       .hdisplay = 1920,
+       .hsync_start = 1920 + 40,
+       .hsync_end = 1920 + 40 + 40,
+       .htotal = 1920 + 40 + 40 + 80,
+       .vdisplay = 1080,
+       .vsync_start = 1080 + 4,
+       .vsync_end = 1080 + 4 + 4,
+       .vtotal = 1080 + 4 + 4 + 24,
+};
+
+static const struct panel_desc innolux_n125hce_gn1 = {
+       .modes = &innolux_n125hce_gn1_mode,
+       .num_modes = 1,
+       .bpc = 8,
+       .size = {
+               .width = 276,
+               .height = 155,
+       },
+       .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+       .bus_flags = DRM_BUS_FLAG_DATA_MSB_TO_LSB,
+       .connector_type = DRM_MODE_CONNECTOR_eDP,
+};
+
 static const struct drm_display_mode innolux_n156bge_l21_mode = {
        .clock = 69300,
        .hdisplay = 1366,
@@ -4123,6 +4148,9 @@ static const struct of_device_id platform_of_match[] = {
                .compatible = "innolux,n116bge",
                .data = &innolux_n116bge,
        }, {
+               .compatible = "innolux,n125hce-gn1",
+               .data = &innolux_n125hce_gn1,
+       }, {
                .compatible = "innolux,n156bge-l21",
                .data = &innolux_n156bge_l21,
        }, {
index 57a31dd..3e0723b 100644 (file)
@@ -228,7 +228,7 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
        INIT_LIST_HEAD(&obj->mappings.list);
        mutex_init(&obj->mappings.lock);
        obj->base.base.funcs = &panfrost_gem_funcs;
-       obj->base.map_cached = pfdev->coherent;
+       obj->base.map_wc = !pfdev->coherent;
 
        return &obj->base.base;
 }
index 128c38c..7dd0c69 100644 (file)
@@ -115,7 +115,7 @@ static struct ttm_tt *qxl_ttm_tt_create(struct ttm_buffer_object *bo,
        ttm = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
        if (ttm == NULL)
                return NULL;
-       if (ttm_dma_tt_init(ttm, bo, page_flags, ttm_cached)) {
+       if (ttm_tt_init(ttm, bo, page_flags, ttm_cached)) {
                kfree(ttm);
                return NULL;
        }
index 57fb3eb..39c1c33 100644 (file)
@@ -155,7 +155,7 @@ int radeon_uvd_init(struct radeon_device *rdev)
                        family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
                        version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
                        version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
-                       DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
+                       DRM_INFO("Found UVD firmware Version: %u.%u Family ID: %u\n",
                                 version_major, version_minor, family_id);
 
                        /*
index 5e80064..a450497 100644 (file)
@@ -122,7 +122,7 @@ int radeon_vce_init(struct radeon_device *rdev)
        if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1)
                return -EINVAL;
 
-       DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n",
+       DRM_INFO("Found VCE firmware/feedback version %d.%d.%d / %d!\n",
                 start, mid, end, rdev->vce.fb_version);
 
        rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
index 1b96780..5455b20 100644 (file)
@@ -63,6 +63,9 @@ static atomic_long_t allocated_pages;
 static struct ttm_pool_type global_write_combined[MAX_ORDER];
 static struct ttm_pool_type global_uncached[MAX_ORDER];
 
+static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
+static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
+
 static spinlock_t shrinker_lock;
 static struct list_head shrinker_list;
 static struct shrinker mm_shrinker;
@@ -290,8 +293,14 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
 #ifdef CONFIG_X86
        switch (caching) {
        case ttm_write_combined:
+               if (pool->use_dma32)
+                       return &global_dma32_write_combined[order];
+
                return &global_write_combined[order];
        case ttm_uncached:
+               if (pool->use_dma32)
+                       return &global_dma32_uncached[order];
+
                return &global_uncached[order];
        default:
                break;
@@ -570,6 +579,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
        seq_puts(m, "uc\t:");
        ttm_pool_debugfs_orders(global_uncached, m);
 
+       seq_puts(m, "wc 32\t:");
+       ttm_pool_debugfs_orders(global_dma32_write_combined, m);
+       seq_puts(m, "uc 32\t:");
+       ttm_pool_debugfs_orders(global_dma32_uncached, m);
+
        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
                seq_puts(m, "DMA ");
                switch (i) {
@@ -640,6 +654,11 @@ int ttm_pool_mgr_init(unsigned long num_pages)
                ttm_pool_type_init(&global_write_combined[i], NULL,
                                   ttm_write_combined, i);
                ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
+
+               ttm_pool_type_init(&global_dma32_write_combined[i], NULL,
+                                  ttm_write_combined, i);
+               ttm_pool_type_init(&global_dma32_uncached[i], NULL,
+                                  ttm_uncached, i);
        }
 
        mm_shrinker.count_objects = ttm_pool_shrinker_count;
@@ -660,6 +679,9 @@ void ttm_pool_mgr_fini(void)
        for (i = 0; i < MAX_ORDER; ++i) {
                ttm_pool_type_fini(&global_write_combined[i]);
                ttm_pool_type_fini(&global_uncached[i]);
+
+               ttm_pool_type_fini(&global_dma32_write_combined[i]);
+               ttm_pool_type_fini(&global_dma32_uncached[i]);
        }
 
        unregister_shrinker(&mm_shrinker);
index b5a8dd9..9269092 100644 (file)
@@ -38,8 +38,6 @@ static const struct drm_driver driver = {
        .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET,
 
        /* GEM hooks */
-       .gem_create_object = drm_gem_shmem_create_object_cached,
-
        .fops = &udl_driver_fops,
        DRM_GEM_SHMEM_DRIVER_OPS,
 
index 8b52cb2..6a8731a 100644 (file)
@@ -78,7 +78,7 @@ struct drm_gem_object *v3d_create_object(struct drm_device *dev, size_t size)
        obj = &bo->base.base;
 
        obj->funcs = &v3d_gem_funcs;
-
+       bo->base.map_wc = true;
        INIT_LIST_HEAD(&bo->unref_head);
 
        return &bo->base.base;
index 34612ed..8aa5220 100644 (file)
@@ -273,8 +273,10 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn,
 }
 
 static void vc4_txp_connector_atomic_commit(struct drm_connector *conn,
-                                       struct drm_connector_state *conn_state)
+                                       struct drm_atomic_state *state)
 {
+       struct drm_connector_state *conn_state = drm_atomic_get_new_connector_state(state,
+                                                                                   conn);
        struct vc4_txp *txp = connector_to_vc4_txp(conn);
        struct drm_gem_cma_object *gem;
        struct drm_display_mode *mode;
index 9a41309..f8635cc 100644 (file)
@@ -403,8 +403,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
        if (ret)
                return ret;
 
-       fput(vma->vm_file);
-       vma->vm_file = get_file(obj->filp);
+       vma_set_file(vma, obj->filp);
        vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
        vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
 
index 24cc445..a3e0fb5 100644 (file)
@@ -364,6 +364,7 @@ int via_wait_irq(struct drm_device *dev, void *data, struct drm_file *file_priv)
                irqwait->request.sequence +=
                        atomic_read(&cur_irq->irq_received);
                irqwait->request.type &= ~_DRM_VBLANK_RELATIVE;
+               break;
        case VIA_IRQ_ABSOLUTE:
                break;
        default:
index 8d8135f..3d6e3a7 100644 (file)
@@ -1001,8 +1001,8 @@ via_verify_command_stream(const uint32_t * buf, unsigned int size,
                        state = via_check_vheader6(&buf, buf_end);
                        break;
                case state_command:
-                       if ((HALCYON_HEADER2 == (cmd = *buf)) &&
-                           supported_3d)
+                       cmd = *buf;
+                       if ((cmd == HALCYON_HEADER2) && supported_3d)
                                state = state_header2;
                        else if ((cmd & HALCYON_HEADER1MASK) == HALCYON_HEADER1)
                                state = state_header1;
@@ -1064,7 +1064,8 @@ via_parse_command_stream(struct drm_device *dev, const uint32_t *buf,
                        state = via_parse_vheader6(dev_priv, &buf, buf_end);
                        break;
                case state_command:
-                       if (HALCYON_HEADER2 == (cmd = *buf))
+                       cmd = *buf;
+                       if (cmd == HALCYON_HEADER2)
                                state = state_header2;
                        else if ((cmd & HALCYON_HEADER1MASK) == HALCYON_HEADER1)
                                state = state_header1;
index f336a8f..5fefc88 100644 (file)
@@ -67,8 +67,8 @@ virtio_gpu_debugfs_irq_info(struct seq_file *m, void *data)
        struct virtio_gpu_device *vgdev = node->minor->dev->dev_private;
 
        seq_printf(m, "fence %llu %lld\n",
-                  (u64)atomic64_read(&vgdev->fence_drv.last_seq),
-                  vgdev->fence_drv.sync_seq);
+                  (u64)atomic64_read(&vgdev->fence_drv.last_fence_id),
+                  vgdev->fence_drv.current_fence_id);
        return 0;
 }
 
index 3c0e172..6a23255 100644 (file)
@@ -127,8 +127,8 @@ typedef void (*virtio_gpu_resp_cb)(struct virtio_gpu_device *vgdev,
                                   struct virtio_gpu_vbuffer *vbuf);
 
 struct virtio_gpu_fence_driver {
-       atomic64_t       last_seq;
-       uint64_t         sync_seq;
+       atomic64_t       last_fence_id;
+       uint64_t         current_fence_id;
        uint64_t         context;
        struct list_head fences;
        spinlock_t       lock;
@@ -257,7 +257,7 @@ struct virtio_gpu_fpriv {
        struct mutex context_lock;
 };
 
-/* virtio_ioctl.c */
+/* virtgpu_ioctl.c */
 #define DRM_VIRTIO_NUM_IOCTLS 11
 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
 void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file);
@@ -420,7 +420,7 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
                          struct virtio_gpu_ctrl_hdr *cmd_hdr,
                          struct virtio_gpu_fence *fence);
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
-                                   u64 last_seq);
+                                   u64 fence_id);
 
 /* virtgpu_object.c */
 void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo);
index 5b2a414..728ca36 100644 (file)
@@ -48,7 +48,7 @@ static bool virtio_fence_signaled(struct dma_fence *f)
                /* leaked fence outside driver before completing
                 * initialization with virtio_gpu_fence_emit */
                return false;
-       if (atomic64_read(&fence->drv->last_seq) >= fence->f.seqno)
+       if (atomic64_read(&fence->drv->last_fence_id) >= fence->f.seqno)
                return true;
        return false;
 }
@@ -62,7 +62,8 @@ static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size)
 {
        struct virtio_gpu_fence *fence = to_virtio_fence(f);
 
-       snprintf(str, size, "%llu", (u64)atomic64_read(&fence->drv->last_seq));
+       snprintf(str, size, "%llu",
+                (u64)atomic64_read(&fence->drv->last_fence_id));
 }
 
 static const struct dma_fence_ops virtio_fence_ops = {
@@ -100,7 +101,7 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
        unsigned long irq_flags;
 
        spin_lock_irqsave(&drv->lock, irq_flags);
-       fence->f.seqno = ++drv->sync_seq;
+       fence->f.seqno = ++drv->current_fence_id;
        dma_fence_get(&fence->f);
        list_add_tail(&fence->node, &drv->fences);
        spin_unlock_irqrestore(&drv->lock, irq_flags);
@@ -112,16 +113,16 @@ void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 }
 
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
-                                   u64 last_seq)
+                                   u64 fence_id)
 {
        struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
        struct virtio_gpu_fence *fence, *tmp;
        unsigned long irq_flags;
 
        spin_lock_irqsave(&drv->lock, irq_flags);
-       atomic64_set(&vgdev->fence_drv.last_seq, last_seq);
+       atomic64_set(&vgdev->fence_drv.last_fence_id, fence_id);
        list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
-               if (last_seq < fence->f.seqno)
+               if (fence_id < fence->f.seqno)
                        continue;
                dma_fence_signal_locked(&fence->f);
                list_del(&fence->node);
index 5417f36..23eb6d7 100644 (file)
@@ -591,8 +591,9 @@ static int verify_blob(struct virtio_gpu_device *vgdev,
        return 0;
 }
 
-static int virtio_gpu_resource_create_blob(struct drm_device *dev,
-                                          void *data, struct drm_file *file)
+static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev,
+                                                void *data,
+                                                struct drm_file *file)
 {
        int ret = 0;
        uint32_t handle = 0;
@@ -696,6 +697,6 @@ struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = {
                          DRM_RENDER_ALLOW),
 
        DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE_BLOB,
-                         virtio_gpu_resource_create_blob,
+                         virtio_gpu_resource_create_blob_ioctl,
                          DRM_RENDER_ALLOW),
 };
index d9ad27e..d69a5b6 100644 (file)
@@ -144,7 +144,6 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev,
 
        dshmem = &shmem->base.base;
        dshmem->base.funcs = &virtio_gpu_shmem_funcs;
-       dshmem->map_cached = true;
        return &dshmem->base;
 }
 
index 1a1b5bc..d4d3922 100644 (file)
@@ -82,7 +82,6 @@ static const struct drm_driver vkms_driver = {
        .driver_features        = DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_GEM,
        .release                = vkms_release,
        .fops                   = &vkms_driver_fops,
-       .gem_create_object = drm_gem_shmem_create_object_cached,
        DRM_GEM_SHMEM_DRIVER_OPS,
 
        .name                   = DRIVER_NAME,
index 67f80ab..78fdc1d 100644 (file)
@@ -2,6 +2,7 @@
 
 #include <linux/dma-buf-map.h>
 
+#include <drm/drm_atomic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_writeback.h>
 #include <drm/drm_probe_helper.h>
@@ -105,8 +106,10 @@ static void vkms_wb_cleanup_job(struct drm_writeback_connector *connector,
 }
 
 static void vkms_wb_atomic_commit(struct drm_connector *conn,
-                                 struct drm_connector_state *state)
+                                 struct drm_atomic_state *state)
 {
+       struct drm_connector_state *connector_state = drm_atomic_get_new_connector_state(state,
+                                                                                        conn);
        struct vkms_device *vkmsdev = drm_device_to_vkms_device(conn->dev);
        struct vkms_output *output = &vkmsdev->output;
        struct drm_writeback_connector *wb_conn = &output->wb_connector;
@@ -122,7 +125,7 @@ static void vkms_wb_atomic_commit(struct drm_connector *conn,
        crtc_state->active_writeback = conn_state->writeback_job->priv;
        crtc_state->wb_pending = true;
        spin_unlock_irq(&output->composer_lock);
-       drm_writeback_queue_job(wb_conn, state);
+       drm_writeback_queue_job(wb_conn, connector_state);
 }
 
 static const struct drm_connector_helper_funcs vkms_wb_conn_helper_funcs = {
index fa69b94..7596dc1 100644 (file)
@@ -355,7 +355,7 @@ static int ssi_add_controller(struct hsi_controller *ssi,
 
        err = ida_simple_get(&platform_omap_ssi_ida, 0, 0, GFP_KERNEL);
        if (err < 0)
-               goto out_err;
+               return err;
        ssi->id = err;
 
        ssi->owner = THIS_MODULE;
index 47f0208..c3fb5be 100644 (file)
@@ -352,7 +352,7 @@ static void hsi_port_release(struct device *dev)
 }
 
 /**
- * hsi_unregister_port - Unregister an HSI port
+ * hsi_port_unregister_clients - Unregister an HSI port
  * @port: The HSI port to unregister
  */
 void hsi_port_unregister_clients(struct hsi_port *port)
index a250481..3bc2551 100644 (file)
  *   convert raw register values is from https://github.com/ocerman/zenpower.
  *   The information is not confirmed from chip datasheets, but experiments
  *   suggest that it provides reasonable temperature values.
- * - Register addresses to read chip voltage and current are also from
- *   https://github.com/ocerman/zenpower, and not confirmed from chip
- *   datasheets. Current calibration is board specific and not typically
- *   shared by board vendors. For this reason, current values are
- *   normalized to report 1A/LSB for core current and and 0.25A/LSB for SoC
- *   current. Reported values can be adjusted using the sensors configuration
- *   file.
  */
 
 #include <linux/bitops.h>
@@ -109,10 +102,7 @@ struct k10temp_data {
        int temp_offset;
        u32 temp_adjust_mask;
        u32 show_temp;
-       u32 svi_addr[2];
        bool is_zen;
-       bool show_current;
-       int cfactor[2];
 };
 
 #define TCTL_BIT       0
@@ -137,16 +127,6 @@ static const struct tctl_offset tctl_offset_table[] = {
        { 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */
 };
 
-static bool is_threadripper(void)
-{
-       return strstr(boot_cpu_data.x86_model_id, "Threadripper");
-}
-
-static bool is_epyc(void)
-{
-       return strstr(boot_cpu_data.x86_model_id, "EPYC");
-}
-
 static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
 {
        pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
@@ -211,16 +191,6 @@ static const char *k10temp_temp_label[] = {
        "Tccd8",
 };
 
-static const char *k10temp_in_label[] = {
-       "Vcore",
-       "Vsoc",
-};
-
-static const char *k10temp_curr_label[] = {
-       "Icore",
-       "Isoc",
-};
-
 static int k10temp_read_labels(struct device *dev,
                               enum hwmon_sensor_types type,
                               u32 attr, int channel, const char **str)
@@ -229,50 +199,6 @@ static int k10temp_read_labels(struct device *dev,
        case hwmon_temp:
                *str = k10temp_temp_label[channel];
                break;
-       case hwmon_in:
-               *str = k10temp_in_label[channel];
-               break;
-       case hwmon_curr:
-               *str = k10temp_curr_label[channel];
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-static int k10temp_read_curr(struct device *dev, u32 attr, int channel,
-                            long *val)
-{
-       struct k10temp_data *data = dev_get_drvdata(dev);
-       u32 regval;
-
-       switch (attr) {
-       case hwmon_curr_input:
-               amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
-                            data->svi_addr[channel], &regval);
-               *val = DIV_ROUND_CLOSEST(data->cfactor[channel] *
-                                        (regval & 0xff),
-                                        1000);
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-static int k10temp_read_in(struct device *dev, u32 attr, int channel, long *val)
-{
-       struct k10temp_data *data = dev_get_drvdata(dev);
-       u32 regval;
-
-       switch (attr) {
-       case hwmon_in_input:
-               amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
-                            data->svi_addr[channel], &regval);
-               regval = (regval >> 16) & 0xff;
-               *val = DIV_ROUND_CLOSEST(155000 - regval * 625, 100);
-               break;
        default:
                return -EOPNOTSUPP;
        }
@@ -331,10 +257,6 @@ static int k10temp_read(struct device *dev, enum hwmon_sensor_types type,
        switch (type) {
        case hwmon_temp:
                return k10temp_read_temp(dev, attr, channel, val);
-       case hwmon_in:
-               return k10temp_read_in(dev, attr, channel, val);
-       case hwmon_curr:
-               return k10temp_read_curr(dev, attr, channel, val);
        default:
                return -EOPNOTSUPP;
        }
@@ -383,11 +305,6 @@ static umode_t k10temp_is_visible(const void *_data,
                        return 0;
                }
                break;
-       case hwmon_in:
-       case hwmon_curr:
-               if (!data->show_current)
-                       return 0;
-               break;
        default:
                return 0;
        }
@@ -517,20 +434,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                case 0x8:       /* Zen+ */
                case 0x11:      /* Zen APU */
                case 0x18:      /* Zen+ APU */
-                       data->show_current = !is_threadripper() && !is_epyc();
-                       data->svi_addr[0] = F17H_M01H_SVI_TEL_PLANE0;
-                       data->svi_addr[1] = F17H_M01H_SVI_TEL_PLANE1;
-                       data->cfactor[0] = F17H_M01H_CFACTOR_ICORE;
-                       data->cfactor[1] = F17H_M01H_CFACTOR_ISOC;
                        k10temp_get_ccd_support(pdev, data, 4);
                        break;
                case 0x31:      /* Zen2 Threadripper */
                case 0x71:      /* Zen2 */
-                       data->show_current = !is_threadripper() && !is_epyc();
-                       data->cfactor[0] = F17H_M31H_CFACTOR_ICORE;
-                       data->cfactor[1] = F17H_M31H_CFACTOR_ISOC;
-                       data->svi_addr[0] = F17H_M31H_SVI_TEL_PLANE0;
-                       data->svi_addr[1] = F17H_M31H_SVI_TEL_PLANE1;
                        k10temp_get_ccd_support(pdev, data, 8);
                        break;
                }
@@ -542,11 +449,6 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
                switch (boot_cpu_data.x86_model) {
                case 0x0 ... 0x1:       /* Zen3 */
-                       data->show_current = true;
-                       data->svi_addr[0] = F19H_M01_SVI_TEL_PLANE0;
-                       data->svi_addr[1] = F19H_M01_SVI_TEL_PLANE1;
-                       data->cfactor[0] = F19H_M01H_CFACTOR_ICORE;
-                       data->cfactor[1] = F19H_M01H_CFACTOR_ISOC;
                        k10temp_get_ccd_support(pdev, data, 8);
                        break;
                }
index 1c6b78a..b61bf53 100644 (file)
@@ -2537,7 +2537,7 @@ int i3c_master_register(struct i3c_master_controller *master,
 
        ret = i3c_master_bus_init(master);
        if (ret)
-               goto err_put_dev;
+               goto err_destroy_wq;
 
        ret = device_add(&master->dev);
        if (ret)
@@ -2568,6 +2568,9 @@ err_del_dev:
 err_cleanup_bus:
        i3c_master_bus_cleanup(master);
 
+err_destroy_wq:
+       destroy_workqueue(master->wq);
+
 err_put_dev:
        put_device(&master->dev);
 
index 4e80a1f..e68f15f 100644 (file)
@@ -21,3 +21,16 @@ config DW_I3C_MASTER
 
          This driver can also be built as a module.  If so, the module
          will be called dw-i3c-master.
+
+config MIPI_I3C_HCI
+       tristate "MIPI I3C Host Controller Interface driver (EXPERIMENTAL)"
+       depends on I3C
+       help
+         Support for hardware following the MIPI Aliance's I3C Host Controller
+         Interface specification.
+
+         For details please see:
+         https://www.mipi.org/specifications/i3c-hci
+
+         This driver can also be built as a module.  If so, the module will be
+         called mipi-i3c-hci.
index 7eea9e0..b892fd4 100644 (file)
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_CDNS_I3C_MASTER)          += i3c-master-cdns.o
 obj-$(CONFIG_DW_I3C_MASTER)            += dw-i3c-master.o
+obj-$(CONFIG_MIPI_I3C_HCI)             += mipi-i3c-hci/
diff --git a/drivers/i3c/master/mipi-i3c-hci/Makefile b/drivers/i3c/master/mipi-i3c-hci/Makefile
new file mode 100644 (file)
index 0000000..a658e7b
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+obj-$(CONFIG_MIPI_I3C_HCI)             += mipi-i3c-hci.o
+mipi-i3c-hci-y                         := core.o ext_caps.o pio.o dma.o \
+                                          cmd_v1.o cmd_v2.o \
+                                          dat_v1.o dct_v1.o
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd.h b/drivers/i3c/master/mipi-i3c-hci/cmd.h
new file mode 100644 (file)
index 0000000..1d6dd2c
--- /dev/null
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Common command/response related stuff
+ */
+
+#ifndef CMD_H
+#define CMD_H
+
+/*
+ * Those bits are common to all descriptor formats and
+ * may be manipulated by the core code.
+ */
+#define CMD_0_TOC                      W0_BIT_(31)
+#define CMD_0_ROC                      W0_BIT_(30)
+#define CMD_0_ATTR                     W0_MASK(2, 0)
+
+/*
+ * Response Descriptor Structure
+ */
+#define RESP_STATUS(resp)              FIELD_GET(GENMASK(31, 28), resp)
+#define RESP_TID(resp)                 FIELD_GET(GENMASK(27, 24), resp)
+#define RESP_DATA_LENGTH(resp)         FIELD_GET(GENMASK(21,  0), resp)
+
+#define RESP_ERR_FIELD                 GENMASK(31, 28)
+
+enum hci_resp_err {
+       RESP_SUCCESS                    = 0x0,
+       RESP_ERR_CRC                    = 0x1,
+       RESP_ERR_PARITY                 = 0x2,
+       RESP_ERR_FRAME                  = 0x3,
+       RESP_ERR_ADDR_HEADER            = 0x4,
+       RESP_ERR_BCAST_NACK_7E          = 0x4,
+       RESP_ERR_NACK                   = 0x5,
+       RESP_ERR_OVL                    = 0x6,
+       RESP_ERR_I3C_SHORT_READ         = 0x7,
+       RESP_ERR_HC_TERMINATED          = 0x8,
+       RESP_ERR_I2C_WR_DATA_NACK       = 0x9,
+       RESP_ERR_BUS_XFER_ABORTED       = 0x9,
+       RESP_ERR_NOT_SUPPORTED          = 0xa,
+       RESP_ERR_ABORTED_WITH_CRC       = 0xb,
+       /* 0xc to 0xf are reserved for transfer specific errors */
+};
+
+/* TID generation (4 bits wide in all cases) */
+#define hci_get_tid(bits) \
+       (atomic_inc_return_relaxed(&hci->next_cmd_tid) % (1U << 4))
+
+/* This abstracts operations with our command descriptor formats */
+struct hci_cmd_ops {
+       int (*prep_ccc)(struct i3c_hci *hci, struct hci_xfer *xfer,
+                       u8 ccc_addr, u8 ccc_cmd, bool raw);
+       void (*prep_i3c_xfer)(struct i3c_hci *hci, struct i3c_dev_desc *dev,
+                             struct hci_xfer *xfer);
+       void (*prep_i2c_xfer)(struct i3c_hci *hci, struct i2c_dev_desc *dev,
+                             struct hci_xfer *xfer);
+       int (*perform_daa)(struct i3c_hci *hci);
+};
+
+/* Our various instances */
+extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v1;
+extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v2;
+
+#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c b/drivers/i3c/master/mipi-i3c-hci/cmd_v1.c
new file mode 100644 (file)
index 0000000..d97c317
--- /dev/null
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * I3C HCI v1.0/v1.1 Command Descriptor Handling
+ */
+
+#include <linux/bitfield.h>
+#include <linux/i3c/master.h>
+
+#include "hci.h"
+#include "cmd.h"
+#include "dat.h"
+#include "dct.h"
+
+
+/*
+ * Address Assignment Command
+ */
+
+#define CMD_0_ATTR_A                   FIELD_PREP(CMD_0_ATTR, 0x2)
+
+#define CMD_A0_TOC                                W0_BIT_(31)
+#define CMD_A0_ROC                                W0_BIT_(30)
+#define CMD_A0_DEV_COUNT(v)            FIELD_PREP(W0_MASK(29, 26), v)
+#define CMD_A0_DEV_INDEX(v)            FIELD_PREP(W0_MASK(20, 16), v)
+#define CMD_A0_CMD(v)                  FIELD_PREP(W0_MASK(14,  7), v)
+#define CMD_A0_TID(v)                  FIELD_PREP(W0_MASK( 6,  3), v)
+
+/*
+ * Immediate Data Transfer Command
+ */
+
+#define CMD_0_ATTR_I                   FIELD_PREP(CMD_0_ATTR, 0x1)
+
+#define CMD_I1_DATA_BYTE_4(v)          FIELD_PREP(W1_MASK(63, 56), v)
+#define CMD_I1_DATA_BYTE_3(v)          FIELD_PREP(W1_MASK(55, 48), v)
+#define CMD_I1_DATA_BYTE_2(v)          FIELD_PREP(W1_MASK(47, 40), v)
+#define CMD_I1_DATA_BYTE_1(v)          FIELD_PREP(W1_MASK(39, 32), v)
+#define CMD_I1_DEF_BYTE(v)             FIELD_PREP(W1_MASK(39, 32), v)
+#define CMD_I0_TOC                                W0_BIT_(31)
+#define CMD_I0_ROC                                W0_BIT_(30)
+#define CMD_I0_RNW                                W0_BIT_(29)
+#define CMD_I0_MODE(v)                 FIELD_PREP(W0_MASK(28, 26), v)
+#define CMD_I0_DTT(v)                  FIELD_PREP(W0_MASK(25, 23), v)
+#define CMD_I0_DEV_INDEX(v)            FIELD_PREP(W0_MASK(20, 16), v)
+#define CMD_I0_CP                                 W0_BIT_(15)
+#define CMD_I0_CMD(v)                  FIELD_PREP(W0_MASK(14,  7), v)
+#define CMD_I0_TID(v)                  FIELD_PREP(W0_MASK( 6,  3), v)
+
+/*
+ * Regular Data Transfer Command
+ */
+
+#define CMD_0_ATTR_R                   FIELD_PREP(CMD_0_ATTR, 0x0)
+
+#define CMD_R1_DATA_LENGTH(v)          FIELD_PREP(W1_MASK(63, 48), v)
+#define CMD_R1_DEF_BYTE(v)             FIELD_PREP(W1_MASK(39, 32), v)
+#define CMD_R0_TOC                                W0_BIT_(31)
+#define CMD_R0_ROC                                W0_BIT_(30)
+#define CMD_R0_RNW                                W0_BIT_(29)
+#define CMD_R0_MODE(v)                 FIELD_PREP(W0_MASK(28, 26), v)
+#define CMD_R0_DBP                                W0_BIT_(25)
+#define CMD_R0_DEV_INDEX(v)            FIELD_PREP(W0_MASK(20, 16), v)
+#define CMD_R0_CP                                 W0_BIT_(15)
+#define CMD_R0_CMD(v)                  FIELD_PREP(W0_MASK(14,  7), v)
+#define CMD_R0_TID(v)                  FIELD_PREP(W0_MASK( 6,  3), v)
+
+/*
+ * Combo Transfer (Write + Write/Read) Command
+ */
+
+#define CMD_0_ATTR_C                   FIELD_PREP(CMD_0_ATTR, 0x3)
+
+#define CMD_C1_DATA_LENGTH(v)          FIELD_PREP(W1_MASK(63, 48), v)
+#define CMD_C1_OFFSET(v)               FIELD_PREP(W1_MASK(47, 32), v)
+#define CMD_C0_TOC                                W0_BIT_(31)
+#define CMD_C0_ROC                                W0_BIT_(30)
+#define CMD_C0_RNW                                W0_BIT_(29)
+#define CMD_C0_MODE(v)                 FIELD_PREP(W0_MASK(28, 26), v)
+#define CMD_C0_16_BIT_SUBOFFSET                           W0_BIT_(25)
+#define CMD_C0_FIRST_PHASE_MODE                           W0_BIT_(24)
+#define CMD_C0_DATA_LENGTH_POSITION(v) FIELD_PREP(W0_MASK(23, 22), v)
+#define CMD_C0_DEV_INDEX(v)            FIELD_PREP(W0_MASK(20, 16), v)
+#define CMD_C0_CP                                 W0_BIT_(15)
+#define CMD_C0_CMD(v)                  FIELD_PREP(W0_MASK(14,  7), v)
+#define CMD_C0_TID(v)                  FIELD_PREP(W0_MASK( 6,  3), v)
+
+/*
+ * Internal Control Command
+ */
+
+#define CMD_0_ATTR_M                   FIELD_PREP(CMD_0_ATTR, 0x7)
+
+#define CMD_M1_VENDOR_SPECIFIC                    W1_MASK(63, 32)
+#define CMD_M0_MIPI_RESERVED                      W0_MASK(31, 12)
+#define CMD_M0_MIPI_CMD                                   W0_MASK(11,  8)
+#define CMD_M0_VENDOR_INFO_PRESENT                W0_BIT_( 7)
+#define CMD_M0_TID(v)                  FIELD_PREP(W0_MASK( 6,  3), v)
+
+
+/* Data Transfer Speed and Mode */
+enum hci_cmd_mode {
+       MODE_I3C_SDR0           = 0x0,
+       MODE_I3C_SDR1           = 0x1,
+       MODE_I3C_SDR2           = 0x2,
+       MODE_I3C_SDR3           = 0x3,
+       MODE_I3C_SDR4           = 0x4,
+       MODE_I3C_HDR_TSx        = 0x5,
+       MODE_I3C_HDR_DDR        = 0x6,
+       MODE_I3C_HDR_BT         = 0x7,
+       MODE_I3C_Fm_FmP         = 0x8,
+       MODE_I2C_Fm             = 0x0,
+       MODE_I2C_FmP            = 0x1,
+       MODE_I2C_UD1            = 0x2,
+       MODE_I2C_UD2            = 0x3,
+       MODE_I2C_UD3            = 0x4,
+};
+
+static enum hci_cmd_mode get_i3c_mode(struct i3c_hci *hci)
+{
+       struct i3c_bus *bus = i3c_master_get_bus(&hci->master);
+
+       if (bus->scl_rate.i3c >= 12500000)
+               return MODE_I3C_SDR0;
+       if (bus->scl_rate.i3c > 8000000)
+               return MODE_I3C_SDR1;
+       if (bus->scl_rate.i3c > 6000000)
+               return MODE_I3C_SDR2;
+       if (bus->scl_rate.i3c > 4000000)
+               return MODE_I3C_SDR3;
+       if (bus->scl_rate.i3c > 2000000)
+               return MODE_I3C_SDR4;
+       return MODE_I3C_Fm_FmP;
+}
+
+static enum hci_cmd_mode get_i2c_mode(struct i3c_hci *hci)
+{
+       struct i3c_bus *bus = i3c_master_get_bus(&hci->master);
+
+       if (bus->scl_rate.i2c >= 1000000)
+               return MODE_I2C_FmP;
+       return MODE_I2C_Fm;
+}
+
+static void fill_data_bytes(struct hci_xfer *xfer, u8 *data,
+                           unsigned int data_len)
+{
+       xfer->cmd_desc[1] = 0;
+       switch (data_len) {
+       case 4:
+               xfer->cmd_desc[1] |= CMD_I1_DATA_BYTE_4(data[3]);
+               fallthrough;
+       case 3:
+               xfer->cmd_desc[1] |= CMD_I1_DATA_BYTE_3(data[2]);
+               fallthrough;
+       case 2:
+               xfer->cmd_desc[1] |= CMD_I1_DATA_BYTE_2(data[1]);
+               fallthrough;
+       case 1:
+               xfer->cmd_desc[1] |= CMD_I1_DATA_BYTE_1(data[0]);
+               fallthrough;
+       case 0:
+               break;
+       }
+       /* we consumed all the data with the cmd descriptor */
+       xfer->data = NULL;
+}
+
+static int hci_cmd_v1_prep_ccc(struct i3c_hci *hci,
+                              struct hci_xfer *xfer,
+                              u8 ccc_addr, u8 ccc_cmd, bool raw)
+{
+       unsigned int dat_idx = 0;
+       enum hci_cmd_mode mode = get_i3c_mode(hci);
+       u8 *data = xfer->data;
+       unsigned int data_len = xfer->data_len;
+       bool rnw = xfer->rnw;
+       int ret;
+
+       /* this should never happen */
+       if (WARN_ON(raw))
+               return -EINVAL;
+
+       if (ccc_addr != I3C_BROADCAST_ADDR) {
+               ret = mipi_i3c_hci_dat_v1.get_index(hci, ccc_addr);
+               if (ret < 0)
+                       return ret;
+               dat_idx = ret;
+       }
+
+       xfer->cmd_tid = hci_get_tid();
+
+       if (!rnw && data_len <= 4) {
+               /* we use an Immediate Data Transfer Command */
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_I |
+                       CMD_I0_TID(xfer->cmd_tid) |
+                       CMD_I0_CMD(ccc_cmd) | CMD_I0_CP |
+                       CMD_I0_DEV_INDEX(dat_idx) |
+                       CMD_I0_DTT(data_len) |
+                       CMD_I0_MODE(mode);
+               fill_data_bytes(xfer, data, data_len);
+       } else {
+               /* we use a Regular Data Transfer Command */
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_R |
+                       CMD_R0_TID(xfer->cmd_tid) |
+                       CMD_R0_CMD(ccc_cmd) | CMD_R0_CP |
+                       CMD_R0_DEV_INDEX(dat_idx) |
+                       CMD_R0_MODE(mode) |
+                       (rnw ? CMD_R0_RNW : 0);
+               xfer->cmd_desc[1] =
+                       CMD_R1_DATA_LENGTH(data_len);
+       }
+
+       return 0;
+}
+
+static void hci_cmd_v1_prep_i3c_xfer(struct i3c_hci *hci,
+                                    struct i3c_dev_desc *dev,
+                                    struct hci_xfer *xfer)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       unsigned int dat_idx = dev_data->dat_idx;
+       enum hci_cmd_mode mode = get_i3c_mode(hci);
+       u8 *data = xfer->data;
+       unsigned int data_len = xfer->data_len;
+       bool rnw = xfer->rnw;
+
+       xfer->cmd_tid = hci_get_tid();
+
+       if (!rnw && data_len <= 4) {
+               /* we use an Immediate Data Transfer Command */
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_I |
+                       CMD_I0_TID(xfer->cmd_tid) |
+                       CMD_I0_DEV_INDEX(dat_idx) |
+                       CMD_I0_DTT(data_len) |
+                       CMD_I0_MODE(mode);
+               fill_data_bytes(xfer, data, data_len);
+       } else {
+               /* we use a Regular Data Transfer Command */
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_R |
+                       CMD_R0_TID(xfer->cmd_tid) |
+                       CMD_R0_DEV_INDEX(dat_idx) |
+                       CMD_R0_MODE(mode) |
+                       (rnw ? CMD_R0_RNW : 0);
+               xfer->cmd_desc[1] =
+                       CMD_R1_DATA_LENGTH(data_len);
+       }
+}
+
+static void hci_cmd_v1_prep_i2c_xfer(struct i3c_hci *hci,
+                                    struct i2c_dev_desc *dev,
+                                    struct hci_xfer *xfer)
+{
+       struct i3c_hci_dev_data *dev_data = i2c_dev_get_master_data(dev);
+       unsigned int dat_idx = dev_data->dat_idx;
+       enum hci_cmd_mode mode = get_i2c_mode(hci);
+       u8 *data = xfer->data;
+       unsigned int data_len = xfer->data_len;
+       bool rnw = xfer->rnw;
+
+       xfer->cmd_tid = hci_get_tid();
+
+       if (!rnw && data_len <= 4) {
+               /* we use an Immediate Data Transfer Command */
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_I |
+                       CMD_I0_TID(xfer->cmd_tid) |
+                       CMD_I0_DEV_INDEX(dat_idx) |
+                       CMD_I0_DTT(data_len) |
+                       CMD_I0_MODE(mode);
+               fill_data_bytes(xfer, data, data_len);
+       } else {
+               /* we use a Regular Data Transfer Command */
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_R |
+                       CMD_R0_TID(xfer->cmd_tid) |
+                       CMD_R0_DEV_INDEX(dat_idx) |
+                       CMD_R0_MODE(mode) |
+                       (rnw ? CMD_R0_RNW : 0);
+               xfer->cmd_desc[1] =
+                       CMD_R1_DATA_LENGTH(data_len);
+       }
+}
+
+static int hci_cmd_v1_daa(struct i3c_hci *hci)
+{
+       struct hci_xfer *xfer;
+       int ret, dat_idx = -1;
+       u8 next_addr = 0;
+       u64 pid;
+       unsigned int dcr, bcr;
+       DECLARE_COMPLETION_ONSTACK(done);
+
+       xfer = hci_alloc_xfer(2);
+       if (!xfer)
+               return -ENOMEM;
+
+       /*
+        * Simple for now: we allocate a temporary DAT entry, do a single
+        * DAA, register the device which will allocate its own DAT entry
+        * via the core callback, then free the temporary DAT entry.
+        * Loop until there is no more devices to assign an address to.
+        * Yes, there is room for improvements.
+        */
+       for (;;) {
+               ret = mipi_i3c_hci_dat_v1.alloc_entry(hci);
+               if (ret < 0)
+                       break;
+               dat_idx = ret;
+               ret = i3c_master_get_free_addr(&hci->master, next_addr);
+               if (ret < 0)
+                       break;
+               next_addr = ret;
+
+               DBG("next_addr = 0x%02x, DAA using DAT %d", next_addr, dat_idx);
+               mipi_i3c_hci_dat_v1.set_dynamic_addr(hci, dat_idx, next_addr);
+               mipi_i3c_hci_dct_index_reset(hci);
+
+               xfer->cmd_tid = hci_get_tid();
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_A |
+                       CMD_A0_TID(xfer->cmd_tid) |
+                       CMD_A0_CMD(I3C_CCC_ENTDAA) |
+                       CMD_A0_DEV_INDEX(dat_idx) |
+                       CMD_A0_DEV_COUNT(1) |
+                       CMD_A0_ROC | CMD_A0_TOC;
+               xfer->cmd_desc[1] = 0;
+               hci->io->queue_xfer(hci, xfer, 1);
+               if (!wait_for_completion_timeout(&done, HZ) &&
+                   hci->io->dequeue_xfer(hci, xfer, 1)) {
+                       ret = -ETIME;
+                       break;
+               }
+               if (RESP_STATUS(xfer[0].response) == RESP_ERR_NACK &&
+                   RESP_STATUS(xfer[0].response) == 1) {
+                       ret = 0;  /* no more devices to be assigned */
+                       break;
+               }
+               if (RESP_STATUS(xfer[0].response) != RESP_SUCCESS) {
+                       ret = -EIO;
+                       break;
+               }
+
+               i3c_hci_dct_get_val(hci, 0, &pid, &dcr, &bcr);
+               DBG("assigned address %#x to device PID=0x%llx DCR=%#x BCR=%#x",
+                   next_addr, pid, dcr, bcr);
+
+               mipi_i3c_hci_dat_v1.free_entry(hci, dat_idx);
+               dat_idx = -1;
+
+               /*
+                * TODO: Extend the subsystem layer to allow for registering
+                * new device and provide BCR/DCR/PID at the same time.
+                */
+               ret = i3c_master_add_i3c_dev_locked(&hci->master, next_addr);
+               if (ret)
+                       break;
+       }
+
+       if (dat_idx >= 0)
+               mipi_i3c_hci_dat_v1.free_entry(hci, dat_idx);
+       hci_free_xfer(xfer, 1);
+       return ret;
+}
+
+const struct hci_cmd_ops mipi_i3c_hci_cmd_v1 = {
+       .prep_ccc               = hci_cmd_v1_prep_ccc,
+       .prep_i3c_xfer          = hci_cmd_v1_prep_i3c_xfer,
+       .prep_i2c_xfer          = hci_cmd_v1_prep_i2c_xfer,
+       .perform_daa            = hci_cmd_v1_daa,
+};
diff --git a/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c b/drivers/i3c/master/mipi-i3c-hci/cmd_v2.c
new file mode 100644 (file)
index 0000000..4493b2b
--- /dev/null
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * I3C HCI v2.0 Command Descriptor Handling
+ *
+ * Note: The I3C HCI v2.0 spec is still in flux. The code here will change.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/i3c/master.h>
+
+#include "hci.h"
+#include "cmd.h"
+#include "xfer_mode_rate.h"
+
+
+/*
+ * Unified Data Transfer Command
+ */
+
+#define CMD_0_ATTR_U                   FIELD_PREP(CMD_0_ATTR, 0x4)
+
+#define CMD_U3_HDR_TSP_ML_CTRL(v)      FIELD_PREP(W3_MASK(107, 104), v)
+#define CMD_U3_IDB4(v)                 FIELD_PREP(W3_MASK(103,  96), v)
+#define CMD_U3_HDR_CMD(v)              FIELD_PREP(W3_MASK(103,  96), v)
+#define CMD_U2_IDB3(v)                 FIELD_PREP(W2_MASK( 95,  88), v)
+#define CMD_U2_HDR_BT(v)               FIELD_PREP(W2_MASK( 95,  88), v)
+#define CMD_U2_IDB2(v)                 FIELD_PREP(W2_MASK( 87,  80), v)
+#define CMD_U2_BT_CMD2(v)              FIELD_PREP(W2_MASK( 87,  80), v)
+#define CMD_U2_IDB1(v)                 FIELD_PREP(W2_MASK( 79,  72), v)
+#define CMD_U2_BT_CMD1(v)              FIELD_PREP(W2_MASK( 79,  72), v)
+#define CMD_U2_IDB0(v)                 FIELD_PREP(W2_MASK( 71,  64), v)
+#define CMD_U2_BT_CMD0(v)              FIELD_PREP(W2_MASK( 71,  64), v)
+#define CMD_U1_ERR_HANDLING(v)         FIELD_PREP(W1_MASK( 63,  62), v)
+#define CMD_U1_ADD_FUNC(v)             FIELD_PREP(W1_MASK( 61,  56), v)
+#define CMD_U1_COMBO_XFER                         W1_BIT_( 55)
+#define CMD_U1_DATA_LENGTH(v)          FIELD_PREP(W1_MASK( 53,  32), v)
+#define CMD_U0_TOC                                W0_BIT_( 31)
+#define CMD_U0_ROC                                W0_BIT_( 30)
+#define CMD_U0_MAY_YIELD                          W0_BIT_( 29)
+#define CMD_U0_NACK_RCNT(v)            FIELD_PREP(W0_MASK( 28,  27), v)
+#define CMD_U0_IDB_COUNT(v)            FIELD_PREP(W0_MASK( 26,  24), v)
+#define CMD_U0_MODE_INDEX(v)           FIELD_PREP(W0_MASK( 22,  18), v)
+#define CMD_U0_XFER_RATE(v)            FIELD_PREP(W0_MASK( 17,  15), v)
+#define CMD_U0_DEV_ADDRESS(v)          FIELD_PREP(W0_MASK( 14,   8), v)
+#define CMD_U0_RnW                                W0_BIT_(  7)
+#define CMD_U0_TID(v)                  FIELD_PREP(W0_MASK(  6,   3), v)
+
+/*
+ * Address Assignment Command
+ */
+
+#define CMD_0_ATTR_A                   FIELD_PREP(CMD_0_ATTR, 0x2)
+
+#define CMD_A1_DATA_LENGTH(v)          FIELD_PREP(W1_MASK( 53,  32), v)
+#define CMD_A0_TOC                                W0_BIT_( 31)
+#define CMD_A0_ROC                                W0_BIT_( 30)
+#define CMD_A0_XFER_RATE(v)            FIELD_PREP(W0_MASK( 17,  15), v)
+#define CMD_A0_ASSIGN_ADDRESS(v)       FIELD_PREP(W0_MASK( 14,   8), v)
+#define CMD_A0_TID(v)                  FIELD_PREP(W0_MASK(  6,   3), v)
+
+
+static unsigned int get_i3c_rate_idx(struct i3c_hci *hci)
+{
+       struct i3c_bus *bus = i3c_master_get_bus(&hci->master);
+
+       if (bus->scl_rate.i3c >= 12000000)
+               return XFERRATE_I3C_SDR0;
+       if (bus->scl_rate.i3c > 8000000)
+               return XFERRATE_I3C_SDR1;
+       if (bus->scl_rate.i3c > 6000000)
+               return XFERRATE_I3C_SDR2;
+       if (bus->scl_rate.i3c > 4000000)
+               return XFERRATE_I3C_SDR3;
+       if (bus->scl_rate.i3c > 2000000)
+               return XFERRATE_I3C_SDR4;
+       return XFERRATE_I3C_SDR_FM_FMP;
+}
+
+static unsigned int get_i2c_rate_idx(struct i3c_hci *hci)
+{
+       struct i3c_bus *bus = i3c_master_get_bus(&hci->master);
+
+       if (bus->scl_rate.i2c >= 1000000)
+               return XFERRATE_I2C_FMP;
+       return XFERRATE_I2C_FM;
+}
+
+static void hci_cmd_v2_prep_private_xfer(struct i3c_hci *hci,
+                                        struct hci_xfer *xfer,
+                                        u8 addr, unsigned int mode,
+                                        unsigned int rate)
+{
+       u8 *data = xfer->data;
+       unsigned int data_len = xfer->data_len;
+       bool rnw = xfer->rnw;
+
+       xfer->cmd_tid = hci_get_tid();
+
+       if (!rnw && data_len <= 5) {
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_U |
+                       CMD_U0_TID(xfer->cmd_tid) |
+                       CMD_U0_DEV_ADDRESS(addr) |
+                       CMD_U0_XFER_RATE(rate) |
+                       CMD_U0_MODE_INDEX(mode) |
+                       CMD_U0_IDB_COUNT(data_len);
+               xfer->cmd_desc[1] =
+                       CMD_U1_DATA_LENGTH(0);
+               xfer->cmd_desc[2] = 0;
+               xfer->cmd_desc[3] = 0;
+               switch (data_len) {
+               case 5:
+                       xfer->cmd_desc[3] |= CMD_U3_IDB4(data[4]);
+                       fallthrough;
+               case 4:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB3(data[3]);
+                       fallthrough;
+               case 3:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB2(data[2]);
+                       fallthrough;
+               case 2:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB1(data[1]);
+                       fallthrough;
+               case 1:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB0(data[0]);
+                       fallthrough;
+               case 0:
+                       break;
+               }
+               /* we consumed all the data with the cmd descriptor */
+               xfer->data = NULL;
+       } else {
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_U |
+                       CMD_U0_TID(xfer->cmd_tid) |
+                       (rnw ? CMD_U0_RnW : 0) |
+                       CMD_U0_DEV_ADDRESS(addr) |
+                       CMD_U0_XFER_RATE(rate) |
+                       CMD_U0_MODE_INDEX(mode);
+               xfer->cmd_desc[1] =
+                       CMD_U1_DATA_LENGTH(data_len);
+               xfer->cmd_desc[2] = 0;
+               xfer->cmd_desc[3] = 0;
+       }
+}
+
+static int hci_cmd_v2_prep_ccc(struct i3c_hci *hci, struct hci_xfer *xfer,
+                              u8 ccc_addr, u8 ccc_cmd, bool raw)
+{
+       unsigned int mode = XFERMODE_IDX_I3C_SDR;
+       unsigned int rate = get_i3c_rate_idx(hci);
+       u8 *data = xfer->data;
+       unsigned int data_len = xfer->data_len;
+       bool rnw = xfer->rnw;
+
+       if (raw && ccc_addr != I3C_BROADCAST_ADDR) {
+               hci_cmd_v2_prep_private_xfer(hci, xfer, ccc_addr, mode, rate);
+               return 0;
+       }
+
+       xfer->cmd_tid = hci_get_tid();
+
+       if (!rnw && data_len <= 4) {
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_U |
+                       CMD_U0_TID(xfer->cmd_tid) |
+                       CMD_U0_DEV_ADDRESS(ccc_addr) |
+                       CMD_U0_XFER_RATE(rate) |
+                       CMD_U0_MODE_INDEX(mode) |
+                       CMD_U0_IDB_COUNT(data_len + (!raw ? 0 : 1));
+               xfer->cmd_desc[1] =
+                       CMD_U1_DATA_LENGTH(0);
+               xfer->cmd_desc[2] =
+                       CMD_U2_IDB0(ccc_cmd);
+               xfer->cmd_desc[3] = 0;
+               switch (data_len) {
+               case 4:
+                       xfer->cmd_desc[3] |= CMD_U3_IDB4(data[3]);
+                       fallthrough;
+               case 3:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB3(data[2]);
+                       fallthrough;
+               case 2:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB2(data[1]);
+                       fallthrough;
+               case 1:
+                       xfer->cmd_desc[2] |= CMD_U2_IDB1(data[0]);
+                       fallthrough;
+               case 0:
+                       break;
+               }
+               /* we consumed all the data with the cmd descriptor */
+               xfer->data = NULL;
+       } else {
+               xfer->cmd_desc[0] =
+                       CMD_0_ATTR_U |
+                       CMD_U0_TID(xfer->cmd_tid) |
+                       (rnw ? CMD_U0_RnW : 0) |
+                       CMD_U0_DEV_ADDRESS(ccc_addr) |
+                       CMD_U0_XFER_RATE(rate) |
+                       CMD_U0_MODE_INDEX(mode) |
+                       CMD_U0_IDB_COUNT(!raw ? 0 : 1);
+               xfer->cmd_desc[1] =
+                       CMD_U1_DATA_LENGTH(data_len);
+               xfer->cmd_desc[2] =
+                       CMD_U2_IDB0(ccc_cmd);
+               xfer->cmd_desc[3] = 0;
+       }
+
+       return 0;
+}
+
+static void hci_cmd_v2_prep_i3c_xfer(struct i3c_hci *hci,
+                                    struct i3c_dev_desc *dev,
+                                    struct hci_xfer *xfer)
+{
+       unsigned int mode = XFERMODE_IDX_I3C_SDR;
+       unsigned int rate = get_i3c_rate_idx(hci);
+       u8 addr = dev->info.dyn_addr;
+
+       hci_cmd_v2_prep_private_xfer(hci, xfer, addr, mode, rate);
+}
+
+static void hci_cmd_v2_prep_i2c_xfer(struct i3c_hci *hci,
+                                    struct i2c_dev_desc *dev,
+                                    struct hci_xfer *xfer)
+{
+       unsigned int mode = XFERMODE_IDX_I2C;
+       unsigned int rate = get_i2c_rate_idx(hci);
+       u8 addr = dev->addr;
+
+       hci_cmd_v2_prep_private_xfer(hci, xfer, addr, mode, rate);
+}
+
+static int hci_cmd_v2_daa(struct i3c_hci *hci)
+{
+       struct hci_xfer *xfer;
+       int ret;
+       u8 next_addr = 0;
+       u32 device_id[2];
+       u64 pid;
+       unsigned int dcr, bcr;
+       DECLARE_COMPLETION_ONSTACK(done);
+
+       xfer = hci_alloc_xfer(2);
+       if (!xfer)
+               return -ENOMEM;
+
+       xfer[0].data = &device_id;
+       xfer[0].data_len = 8;
+       xfer[0].rnw = true;
+       xfer[0].cmd_desc[1] = CMD_A1_DATA_LENGTH(8);
+       xfer[1].completion = &done;
+
+       for (;;) {
+               ret = i3c_master_get_free_addr(&hci->master, next_addr);
+               if (ret < 0)
+                       break;
+               next_addr = ret;
+               DBG("next_addr = 0x%02x", next_addr);
+               xfer[0].cmd_tid = hci_get_tid();
+               xfer[0].cmd_desc[0] =
+                       CMD_0_ATTR_A |
+                       CMD_A0_TID(xfer[0].cmd_tid) |
+                       CMD_A0_ROC;
+               xfer[1].cmd_tid = hci_get_tid();
+               xfer[1].cmd_desc[0] =
+                       CMD_0_ATTR_A |
+                       CMD_A0_TID(xfer[1].cmd_tid) |
+                       CMD_A0_ASSIGN_ADDRESS(next_addr) |
+                       CMD_A0_ROC |
+                       CMD_A0_TOC;
+               hci->io->queue_xfer(hci, xfer, 2);
+               if (!wait_for_completion_timeout(&done, HZ) &&
+                   hci->io->dequeue_xfer(hci, xfer, 2)) {
+                       ret = -ETIME;
+                       break;
+               }
+               if (RESP_STATUS(xfer[0].response) != RESP_SUCCESS) {
+                       ret = 0;  /* no more devices to be assigned */
+                       break;
+               }
+               if (RESP_STATUS(xfer[1].response) != RESP_SUCCESS) {
+                       ret = -EIO;
+                       break;
+               }
+
+               pid = FIELD_GET(W1_MASK(47, 32), device_id[1]);
+               pid = (pid << 32) | device_id[0];
+               bcr = FIELD_GET(W1_MASK(55, 48), device_id[1]);
+               dcr = FIELD_GET(W1_MASK(63, 56), device_id[1]);
+               DBG("assigned address %#x to device PID=0x%llx DCR=%#x BCR=%#x",
+                   next_addr, pid, dcr, bcr);
+               /*
+                * TODO: Extend the subsystem layer to allow for registering
+                * new device and provide BCR/DCR/PID at the same time.
+                */
+               ret = i3c_master_add_i3c_dev_locked(&hci->master, next_addr);
+               if (ret)
+                       break;
+       }
+
+       hci_free_xfer(xfer, 2);
+       return ret;
+}
+
+const struct hci_cmd_ops mipi_i3c_hci_cmd_v2 = {
+       .prep_ccc               = hci_cmd_v2_prep_ccc,
+       .prep_i3c_xfer          = hci_cmd_v2_prep_i3c_xfer,
+       .prep_i2c_xfer          = hci_cmd_v2_prep_i2c_xfer,
+       .perform_daa            = hci_cmd_v2_daa,
+};
diff --git a/drivers/i3c/master/mipi-i3c-hci/core.c b/drivers/i3c/master/mipi-i3c-hci/core.c
new file mode 100644 (file)
index 0000000..500abd2
--- /dev/null
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Core driver code with main interface to the I3C subsystem.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/i3c/master.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "hci.h"
+#include "ext_caps.h"
+#include "cmd.h"
+#include "dat.h"
+
+
+/*
+ * Host Controller Capabilities and Operation Registers
+ */
+
+#define reg_read(r)            readl(hci->base_regs + (r))
+#define reg_write(r, v)                writel(v, hci->base_regs + (r))
+#define reg_set(r, v)          reg_write(r, reg_read(r) | (v))
+#define reg_clear(r, v)                reg_write(r, reg_read(r) & ~(v))
+
+#define HCI_VERSION                    0x00    /* HCI Version (in BCD) */
+
+#define HC_CONTROL                     0x04
+#define HC_CONTROL_BUS_ENABLE          BIT(31)
+#define HC_CONTROL_RESUME              BIT(30)
+#define HC_CONTROL_ABORT               BIT(29)
+#define HC_CONTROL_HALT_ON_CMD_TIMEOUT BIT(12)
+#define HC_CONTROL_HOT_JOIN_CTRL       BIT(8)  /* Hot-Join ACK/NACK Control */
+#define HC_CONTROL_I2C_TARGET_PRESENT  BIT(7)
+#define HC_CONTROL_PIO_MODE            BIT(6)  /* DMA/PIO Mode Selector */
+#define HC_CONTROL_DATA_BIG_ENDIAN     BIT(4)
+#define HC_CONTROL_IBA_INCLUDE         BIT(0)  /* Include I3C Broadcast Address */
+
+#define MASTER_DEVICE_ADDR             0x08    /* Master Device Address */
+#define MASTER_DYNAMIC_ADDR_VALID      BIT(31) /* Dynamic Address is Valid */
+#define MASTER_DYNAMIC_ADDR(v)         FIELD_PREP(GENMASK(22, 16), v)
+
+#define HC_CAPABILITIES                        0x0c
+#define HC_CAP_SG_DC_EN                        BIT(30)
+#define HC_CAP_SG_IBI_EN               BIT(29)
+#define HC_CAP_SG_CR_EN                        BIT(28)
+#define HC_CAP_MAX_DATA_LENGTH         GENMASK(24, 22)
+#define HC_CAP_CMD_SIZE                        GENMASK(21, 20)
+#define HC_CAP_DIRECT_COMMANDS_EN      BIT(18)
+#define HC_CAP_MULTI_LANE_EN           BIT(15)
+#define HC_CAP_CMD_CCC_DEFBYTE         BIT(10)
+#define HC_CAP_HDR_BT_EN               BIT(8)
+#define HC_CAP_HDR_TS_EN               BIT(7)
+#define HC_CAP_HDR_DDR_EN              BIT(6)
+#define HC_CAP_NON_CURRENT_MASTER_CAP  BIT(5)  /* master handoff capable */
+#define HC_CAP_DATA_BYTE_CFG_EN                BIT(4)  /* endian selection possible */
+#define HC_CAP_AUTO_COMMAND            BIT(3)
+#define HC_CAP_COMBO_COMMAND           BIT(2)
+
+#define RESET_CONTROL                  0x10
+#define BUS_RESET                      BIT(31)
+#define BUS_RESET_TYPE                 GENMASK(30, 29)
+#define IBI_QUEUE_RST                  BIT(5)
+#define RX_FIFO_RST                    BIT(4)
+#define TX_FIFO_RST                    BIT(3)
+#define RESP_QUEUE_RST                 BIT(2)
+#define CMD_QUEUE_RST                  BIT(1)
+#define SOFT_RST                       BIT(0)  /* Core Reset */
+
+#define PRESENT_STATE                  0x14
+#define STATE_CURRENT_MASTER           BIT(2)
+
+#define INTR_STATUS                    0x20
+#define INTR_STATUS_ENABLE             0x24
+#define INTR_SIGNAL_ENABLE             0x28
+#define INTR_FORCE                     0x2c
+#define INTR_HC_CMD_SEQ_UFLOW_STAT     BIT(12) /* Cmd Sequence Underflow */
+#define INTR_HC_RESET_CANCEL           BIT(11) /* HC Cancelled Reset */
+#define INTR_HC_INTERNAL_ERR           BIT(10) /* HC Internal Error */
+#define INTR_HC_PIO                    BIT(8)  /* cascaded PIO interrupt */
+#define INTR_HC_RINGS                  GENMASK(7, 0)
+
+#define DAT_SECTION                    0x30    /* Device Address Table */
+#define DAT_ENTRY_SIZE                 GENMASK(31, 28)
+#define DAT_TABLE_SIZE                 GENMASK(18, 12)
+#define DAT_TABLE_OFFSET               GENMASK(11, 0)
+
+#define DCT_SECTION                    0x34    /* Device Characteristics Table */
+#define DCT_ENTRY_SIZE                 GENMASK(31, 28)
+#define DCT_TABLE_INDEX                        GENMASK(23, 19)
+#define DCT_TABLE_SIZE                 GENMASK(18, 12)
+#define DCT_TABLE_OFFSET               GENMASK(11, 0)
+
+#define RING_HEADERS_SECTION           0x38
+#define RING_HEADERS_OFFSET            GENMASK(15, 0)
+
+#define PIO_SECTION                    0x3c
+#define PIO_REGS_OFFSET                        GENMASK(15, 0)  /* PIO Offset */
+
+#define EXT_CAPS_SECTION               0x40
+#define EXT_CAPS_OFFSET                        GENMASK(15, 0)
+
+#define IBI_NOTIFY_CTRL                        0x58    /* IBI Notify Control */
+#define IBI_NOTIFY_SIR_REJECTED                BIT(3)  /* Rejected Target Interrupt Request */
+#define IBI_NOTIFY_MR_REJECTED         BIT(1)  /* Rejected Master Request Control */
+#define IBI_NOTIFY_HJ_REJECTED         BIT(0)  /* Rejected Hot-Join Control */
+
+#define DEV_CTX_BASE_LO                        0x60
+#define DEV_CTX_BASE_HI                        0x64
+
+
+static inline struct i3c_hci *to_i3c_hci(struct i3c_master_controller *m)
+{
+       return container_of(m, struct i3c_hci, master);
+}
+
+static int i3c_hci_bus_init(struct i3c_master_controller *m)
+{
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_device_info info;
+       int ret;
+
+       DBG("");
+
+       if (hci->cmd == &mipi_i3c_hci_cmd_v1) {
+               ret = mipi_i3c_hci_dat_v1.init(hci);
+               if (ret)
+                       return ret;
+       }
+
+       ret = i3c_master_get_free_addr(m, 0);
+       if (ret < 0)
+               return ret;
+       reg_write(MASTER_DEVICE_ADDR,
+                 MASTER_DYNAMIC_ADDR(ret) | MASTER_DYNAMIC_ADDR_VALID);
+       memset(&info, 0, sizeof(info));
+       info.dyn_addr = ret;
+       ret = i3c_master_set_info(m, &info);
+       if (ret)
+               return ret;
+
+       ret = hci->io->init(hci);
+       if (ret)
+               return ret;
+
+       reg_set(HC_CONTROL, HC_CONTROL_BUS_ENABLE);
+       DBG("HC_CONTROL = %#x", reg_read(HC_CONTROL));
+
+       return 0;
+}
+
+static void i3c_hci_bus_cleanup(struct i3c_master_controller *m)
+{
+       struct i3c_hci *hci = to_i3c_hci(m);
+
+       DBG("");
+
+       reg_clear(HC_CONTROL, HC_CONTROL_BUS_ENABLE);
+       hci->io->cleanup(hci);
+       if (hci->cmd == &mipi_i3c_hci_cmd_v1)
+               mipi_i3c_hci_dat_v1.cleanup(hci);
+}
+
+void mipi_i3c_hci_resume(struct i3c_hci *hci)
+{
+       /* the HC_CONTROL_RESUME bit is R/W1C so just read and write back */
+       reg_write(HC_CONTROL, reg_read(HC_CONTROL));
+}
+
+/* located here rather than pio.c because needed bits are in core reg space */
+void mipi_i3c_hci_pio_reset(struct i3c_hci *hci)
+{
+       reg_write(RESET_CONTROL, RX_FIFO_RST | TX_FIFO_RST | RESP_QUEUE_RST);
+}
+
+/* located here rather than dct.c because needed bits are in core reg space */
+void mipi_i3c_hci_dct_index_reset(struct i3c_hci *hci)
+{
+       reg_write(DCT_SECTION, FIELD_PREP(DCT_TABLE_INDEX, 0));
+}
+
+static int i3c_hci_send_ccc_cmd(struct i3c_master_controller *m,
+                               struct i3c_ccc_cmd *ccc)
+{
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct hci_xfer *xfer;
+       bool raw = !!(hci->quirks & HCI_QUIRK_RAW_CCC);
+       bool prefixed = raw && !!(ccc->id & I3C_CCC_DIRECT);
+       unsigned int nxfers = ccc->ndests + prefixed;
+       DECLARE_COMPLETION_ONSTACK(done);
+       int i, last, ret = 0;
+
+       DBG("cmd=%#x rnw=%d ndests=%d data[0].len=%d",
+           ccc->id, ccc->rnw, ccc->ndests, ccc->dests[0].payload.len);
+
+       xfer = hci_alloc_xfer(nxfers);
+       if (!xfer)
+               return -ENOMEM;
+
+       if (prefixed) {
+               xfer->data = NULL;
+               xfer->data_len = 0;
+               xfer->rnw = false;
+               hci->cmd->prep_ccc(hci, xfer, I3C_BROADCAST_ADDR,
+                                  ccc->id, true);
+               xfer++;
+       }
+
+       for (i = 0; i < nxfers - prefixed; i++) {
+               xfer[i].data = ccc->dests[i].payload.data;
+               xfer[i].data_len = ccc->dests[i].payload.len;
+               xfer[i].rnw = ccc->rnw;
+               ret = hci->cmd->prep_ccc(hci, &xfer[i], ccc->dests[i].addr,
+                                        ccc->id, raw);
+               if (ret)
+                       goto out;
+               xfer[i].cmd_desc[0] |= CMD_0_ROC;
+       }
+       last = i - 1;
+       xfer[last].cmd_desc[0] |= CMD_0_TOC;
+       xfer[last].completion = &done;
+
+       if (prefixed)
+               xfer--;
+
+       ret = hci->io->queue_xfer(hci, xfer, nxfers);
+       if (ret)
+               goto out;
+       if (!wait_for_completion_timeout(&done, HZ) &&
+           hci->io->dequeue_xfer(hci, xfer, nxfers)) {
+               ret = -ETIME;
+               goto out;
+       }
+       for (i = prefixed; i < nxfers; i++) {
+               if (ccc->rnw)
+                       ccc->dests[i - prefixed].payload.len =
+                               RESP_DATA_LENGTH(xfer[i].response);
+               if (RESP_STATUS(xfer[i].response) != RESP_SUCCESS) {
+                       ret = -EIO;
+                       goto out;
+               }
+       }
+
+       if (ccc->rnw)
+               DBG("got: %*ph",
+                   ccc->dests[0].payload.len, ccc->dests[0].payload.data);
+
+out:
+       hci_free_xfer(xfer, nxfers);
+       return ret;
+}
+
+static int i3c_hci_daa(struct i3c_master_controller *m)
+{
+       struct i3c_hci *hci = to_i3c_hci(m);
+
+       DBG("");
+
+       return hci->cmd->perform_daa(hci);
+}
+
+static int i3c_hci_priv_xfers(struct i3c_dev_desc *dev,
+                             struct i3c_priv_xfer *i3c_xfers,
+                             int nxfers)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct hci_xfer *xfer;
+       DECLARE_COMPLETION_ONSTACK(done);
+       unsigned int size_limit;
+       int i, last, ret = 0;
+
+       DBG("nxfers = %d", nxfers);
+
+       xfer = hci_alloc_xfer(nxfers);
+       if (!xfer)
+               return -ENOMEM;
+
+       size_limit = 1U << (16 + FIELD_GET(HC_CAP_MAX_DATA_LENGTH, hci->caps));
+
+       for (i = 0; i < nxfers; i++) {
+               xfer[i].data_len = i3c_xfers[i].len;
+               ret = -EFBIG;
+               if (xfer[i].data_len >= size_limit)
+                       goto out;
+               xfer[i].rnw = i3c_xfers[i].rnw;
+               if (i3c_xfers[i].rnw) {
+                       xfer[i].data = i3c_xfers[i].data.in;
+               } else {
+                       /* silence the const qualifier warning with a cast */
+                       xfer[i].data = (void *) i3c_xfers[i].data.out;
+               }
+               hci->cmd->prep_i3c_xfer(hci, dev, &xfer[i]);
+               xfer[i].cmd_desc[0] |= CMD_0_ROC;
+       }
+       last = i - 1;
+       xfer[last].cmd_desc[0] |= CMD_0_TOC;
+       xfer[last].completion = &done;
+
+       ret = hci->io->queue_xfer(hci, xfer, nxfers);
+       if (ret)
+               goto out;
+       if (!wait_for_completion_timeout(&done, HZ) &&
+           hci->io->dequeue_xfer(hci, xfer, nxfers)) {
+               ret = -ETIME;
+               goto out;
+       }
+       for (i = 0; i < nxfers; i++) {
+               if (i3c_xfers[i].rnw)
+                       i3c_xfers[i].len = RESP_DATA_LENGTH(xfer[i].response);
+               if (RESP_STATUS(xfer[i].response) != RESP_SUCCESS) {
+                       ret = -EIO;
+                       goto out;
+               }
+       }
+
+out:
+       hci_free_xfer(xfer, nxfers);
+       return ret;
+}
+
+static int i3c_hci_i2c_xfers(struct i2c_dev_desc *dev,
+                            const struct i2c_msg *i2c_xfers, int nxfers)
+{
+       struct i3c_master_controller *m = i2c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct hci_xfer *xfer;
+       DECLARE_COMPLETION_ONSTACK(done);
+       int i, last, ret = 0;
+
+       DBG("nxfers = %d", nxfers);
+
+       xfer = hci_alloc_xfer(nxfers);
+       if (!xfer)
+               return -ENOMEM;
+
+       for (i = 0; i < nxfers; i++) {
+               xfer[i].data = i2c_xfers[i].buf;
+               xfer[i].data_len = i2c_xfers[i].len;
+               xfer[i].rnw = i2c_xfers[i].flags & I2C_M_RD;
+               hci->cmd->prep_i2c_xfer(hci, dev, &xfer[i]);
+               xfer[i].cmd_desc[0] |= CMD_0_ROC;
+       }
+       last = i - 1;
+       xfer[last].cmd_desc[0] |= CMD_0_TOC;
+       xfer[last].completion = &done;
+
+       ret = hci->io->queue_xfer(hci, xfer, nxfers);
+       if (ret)
+               goto out;
+       if (!wait_for_completion_timeout(&done, HZ) &&
+           hci->io->dequeue_xfer(hci, xfer, nxfers)) {
+               ret = -ETIME;
+               goto out;
+       }
+       for (i = 0; i < nxfers; i++) {
+               if (RESP_STATUS(xfer[i].response) != RESP_SUCCESS) {
+                       ret = -EIO;
+                       goto out;
+               }
+       }
+
+out:
+       hci_free_xfer(xfer, nxfers);
+       return ret;
+}
+
+static int i3c_hci_attach_i3c_dev(struct i3c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data;
+       int ret;
+
+       DBG("");
+
+       dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
+       if (!dev_data)
+               return -ENOMEM;
+       if (hci->cmd == &mipi_i3c_hci_cmd_v1) {
+               ret = mipi_i3c_hci_dat_v1.alloc_entry(hci);
+               if (ret < 0) {
+                       kfree(dev_data);
+                       return ret;
+               }
+               mipi_i3c_hci_dat_v1.set_dynamic_addr(hci, ret, dev->info.dyn_addr);
+               dev_data->dat_idx = ret;
+       }
+       i3c_dev_set_master_data(dev, dev_data);
+       return 0;
+}
+
+static int i3c_hci_reattach_i3c_dev(struct i3c_dev_desc *dev, u8 old_dyn_addr)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+
+       DBG("");
+
+       if (hci->cmd == &mipi_i3c_hci_cmd_v1)
+               mipi_i3c_hci_dat_v1.set_dynamic_addr(hci, dev_data->dat_idx,
+                                            dev->info.dyn_addr);
+       return 0;
+}
+
+static void i3c_hci_detach_i3c_dev(struct i3c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+
+       DBG("");
+
+       i3c_dev_set_master_data(dev, NULL);
+       if (hci->cmd == &mipi_i3c_hci_cmd_v1)
+               mipi_i3c_hci_dat_v1.free_entry(hci, dev_data->dat_idx);
+       kfree(dev_data);
+}
+
+static int i3c_hci_attach_i2c_dev(struct i2c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i2c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data;
+       int ret;
+
+       DBG("");
+
+       if (hci->cmd != &mipi_i3c_hci_cmd_v1)
+               return 0;
+       dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
+       if (!dev_data)
+               return -ENOMEM;
+       ret = mipi_i3c_hci_dat_v1.alloc_entry(hci);
+       if (ret < 0) {
+               kfree(dev_data);
+               return ret;
+       }
+       mipi_i3c_hci_dat_v1.set_static_addr(hci, ret, dev->addr);
+       mipi_i3c_hci_dat_v1.set_flags(hci, ret, DAT_0_I2C_DEVICE, 0);
+       dev_data->dat_idx = ret;
+       i2c_dev_set_master_data(dev, dev_data);
+       return 0;
+}
+
+static void i3c_hci_detach_i2c_dev(struct i2c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i2c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data = i2c_dev_get_master_data(dev);
+
+       DBG("");
+
+       if (dev_data) {
+               i2c_dev_set_master_data(dev, NULL);
+               if (hci->cmd == &mipi_i3c_hci_cmd_v1)
+                       mipi_i3c_hci_dat_v1.free_entry(hci, dev_data->dat_idx);
+               kfree(dev_data);
+       }
+}
+
+static int i3c_hci_request_ibi(struct i3c_dev_desc *dev,
+                              const struct i3c_ibi_setup *req)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       unsigned int dat_idx = dev_data->dat_idx;
+
+       if (req->max_payload_len != 0)
+               mipi_i3c_hci_dat_v1.set_flags(hci, dat_idx, DAT_0_IBI_PAYLOAD, 0);
+       else
+               mipi_i3c_hci_dat_v1.clear_flags(hci, dat_idx, DAT_0_IBI_PAYLOAD, 0);
+       return hci->io->request_ibi(hci, dev, req);
+}
+
+static void i3c_hci_free_ibi(struct i3c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+
+       hci->io->free_ibi(hci, dev);
+}
+
+static int i3c_hci_enable_ibi(struct i3c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+
+       mipi_i3c_hci_dat_v1.clear_flags(hci, dev_data->dat_idx, DAT_0_SIR_REJECT, 0);
+       return i3c_master_enec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR);
+}
+
+static int i3c_hci_disable_ibi(struct i3c_dev_desc *dev)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+
+       mipi_i3c_hci_dat_v1.set_flags(hci, dev_data->dat_idx, DAT_0_SIR_REJECT, 0);
+       return i3c_master_disec_locked(m, dev->info.dyn_addr, I3C_CCC_EVENT_SIR);
+}
+
+static void i3c_hci_recycle_ibi_slot(struct i3c_dev_desc *dev,
+                                    struct i3c_ibi_slot *slot)
+{
+       struct i3c_master_controller *m = i3c_dev_get_master(dev);
+       struct i3c_hci *hci = to_i3c_hci(m);
+
+       hci->io->recycle_ibi_slot(hci, dev, slot);
+}
+
+static const struct i3c_master_controller_ops i3c_hci_ops = {
+       .bus_init               = i3c_hci_bus_init,
+       .bus_cleanup            = i3c_hci_bus_cleanup,
+       .do_daa                 = i3c_hci_daa,
+       .send_ccc_cmd           = i3c_hci_send_ccc_cmd,
+       .priv_xfers             = i3c_hci_priv_xfers,
+       .i2c_xfers              = i3c_hci_i2c_xfers,
+       .attach_i3c_dev         = i3c_hci_attach_i3c_dev,
+       .reattach_i3c_dev       = i3c_hci_reattach_i3c_dev,
+       .detach_i3c_dev         = i3c_hci_detach_i3c_dev,
+       .attach_i2c_dev         = i3c_hci_attach_i2c_dev,
+       .detach_i2c_dev         = i3c_hci_detach_i2c_dev,
+       .request_ibi            = i3c_hci_request_ibi,
+       .free_ibi               = i3c_hci_free_ibi,
+       .enable_ibi             = i3c_hci_enable_ibi,
+       .disable_ibi            = i3c_hci_disable_ibi,
+       .recycle_ibi_slot       = i3c_hci_recycle_ibi_slot,
+};
+
+static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id)
+{
+       struct i3c_hci *hci = dev_id;
+       irqreturn_t result = IRQ_NONE;
+       u32 val;
+
+       val = reg_read(INTR_STATUS);
+       DBG("INTR_STATUS = %#x", val);
+
+       if (val) {
+               reg_write(INTR_STATUS, val);
+       } else {
+               /* v1.0 does not have PIO cascaded notification bits */
+               val |= INTR_HC_PIO;
+       }
+
+       if (val & INTR_HC_RESET_CANCEL) {
+               DBG("cancelled reset");
+               val &= ~INTR_HC_RESET_CANCEL;
+       }
+       if (val & INTR_HC_INTERNAL_ERR) {
+               dev_err(&hci->master.dev, "Host Controller Internal Error\n");
+               val &= ~INTR_HC_INTERNAL_ERR;
+       }
+       if (val & INTR_HC_PIO) {
+               hci->io->irq_handler(hci, 0);
+               val &= ~INTR_HC_PIO;
+       }
+       if (val & INTR_HC_RINGS) {
+               hci->io->irq_handler(hci, val & INTR_HC_RINGS);
+               val &= ~INTR_HC_RINGS;
+       }
+       if (val)
+               dev_err(&hci->master.dev, "unexpected INTR_STATUS %#x\n", val);
+       else
+               result = IRQ_HANDLED;
+
+       return result;
+}
+
+static int i3c_hci_init(struct i3c_hci *hci)
+{
+       u32 regval, offset;
+       int ret;
+
+       /* Validate HCI hardware version */
+       regval = reg_read(HCI_VERSION);
+       hci->version_major = (regval >> 8) & 0xf;
+       hci->version_minor = (regval >> 4) & 0xf;
+       hci->revision = regval & 0xf;
+       dev_notice(&hci->master.dev, "MIPI I3C HCI v%u.%u r%02u\n",
+                  hci->version_major, hci->version_minor, hci->revision);
+       /* known versions */
+       switch (regval & ~0xf) {
+       case 0x100:     /* version 1.0 */
+       case 0x110:     /* version 1.1 */
+       case 0x200:     /* version 2.0 */
+               break;
+       default:
+               dev_err(&hci->master.dev, "unsupported HCI version\n");
+               return -EPROTONOSUPPORT;
+       }
+
+       hci->caps = reg_read(HC_CAPABILITIES);
+       DBG("caps = %#x", hci->caps);
+
+       regval = reg_read(DAT_SECTION);
+       offset = FIELD_GET(DAT_TABLE_OFFSET, regval);
+       hci->DAT_regs = offset ? hci->base_regs + offset : NULL;
+       hci->DAT_entries = FIELD_GET(DAT_TABLE_SIZE, regval);
+       hci->DAT_entry_size = FIELD_GET(DAT_ENTRY_SIZE, regval);
+       dev_info(&hci->master.dev, "DAT: %u %u-bytes entries at offset %#x\n",
+                hci->DAT_entries, hci->DAT_entry_size * 4, offset);
+
+       regval = reg_read(DCT_SECTION);
+       offset = FIELD_GET(DCT_TABLE_OFFSET, regval);
+       hci->DCT_regs = offset ? hci->base_regs + offset : NULL;
+       hci->DCT_entries = FIELD_GET(DCT_TABLE_SIZE, regval);
+       hci->DCT_entry_size = FIELD_GET(DCT_ENTRY_SIZE, regval);
+       dev_info(&hci->master.dev, "DCT: %u %u-bytes entries at offset %#x\n",
+                hci->DCT_entries, hci->DCT_entry_size * 4, offset);
+
+       regval = reg_read(RING_HEADERS_SECTION);
+       offset = FIELD_GET(RING_HEADERS_OFFSET, regval);
+       hci->RHS_regs = offset ? hci->base_regs + offset : NULL;
+       dev_info(&hci->master.dev, "Ring Headers at offset %#x\n", offset);
+
+       regval = reg_read(PIO_SECTION);
+       offset = FIELD_GET(PIO_REGS_OFFSET, regval);
+       hci->PIO_regs = offset ? hci->base_regs + offset : NULL;
+       dev_info(&hci->master.dev, "PIO section at offset %#x\n", offset);
+
+       regval = reg_read(EXT_CAPS_SECTION);
+       offset = FIELD_GET(EXT_CAPS_OFFSET, regval);
+       hci->EXTCAPS_regs = offset ? hci->base_regs + offset : NULL;
+       dev_info(&hci->master.dev, "Extended Caps at offset %#x\n", offset);
+
+       ret = i3c_hci_parse_ext_caps(hci);
+       if (ret)
+               return ret;
+
+       /*
+        * Now let's reset the hardware.
+        * SOFT_RST must be clear before we write to it.
+        * Then we must wait until it clears again.
+        */
+       ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval,
+                                !(regval & SOFT_RST), 1, 10000);
+       if (ret)
+               return -ENXIO;
+       reg_write(RESET_CONTROL, SOFT_RST);
+       ret = readx_poll_timeout(reg_read, RESET_CONTROL, regval,
+                                !(regval & SOFT_RST), 1, 10000);
+       if (ret)
+               return -ENXIO;
+
+       /* Disable all interrupts and allow all signal updates */
+       reg_write(INTR_SIGNAL_ENABLE, 0x0);
+       reg_write(INTR_STATUS_ENABLE, 0xffffffff);
+
+       /* Make sure our data ordering fits the host's */
+       regval = reg_read(HC_CONTROL);
+       if (IS_ENABLED(CONFIG_BIG_ENDIAN)) {
+               if (!(regval & HC_CONTROL_DATA_BIG_ENDIAN)) {
+                       regval |= HC_CONTROL_DATA_BIG_ENDIAN;
+                       reg_write(HC_CONTROL, regval);
+                       regval = reg_read(HC_CONTROL);
+                       if (!(regval & HC_CONTROL_DATA_BIG_ENDIAN)) {
+                               dev_err(&hci->master.dev, "cannot set BE mode\n");
+                               return -EOPNOTSUPP;
+                       }
+               }
+       } else {
+               if (regval & HC_CONTROL_DATA_BIG_ENDIAN) {
+                       regval &= ~HC_CONTROL_DATA_BIG_ENDIAN;
+                       reg_write(HC_CONTROL, regval);
+                       regval = reg_read(HC_CONTROL);
+                       if (regval & HC_CONTROL_DATA_BIG_ENDIAN) {
+                               dev_err(&hci->master.dev, "cannot clear BE mode\n");
+                               return -EOPNOTSUPP;
+                       }
+               }
+       }
+
+       /* Select our command descriptor model */
+       switch (FIELD_GET(HC_CAP_CMD_SIZE, hci->caps)) {
+       case 0:
+               hci->cmd = &mipi_i3c_hci_cmd_v1;
+               break;
+       case 1:
+               hci->cmd = &mipi_i3c_hci_cmd_v2;
+               break;
+       default:
+               dev_err(&hci->master.dev, "wrong CMD_SIZE capability value\n");
+               return -EINVAL;
+       }
+
+       /* Try activating DMA operations first */
+       if (hci->RHS_regs) {
+               reg_clear(HC_CONTROL, HC_CONTROL_PIO_MODE);
+               if (reg_read(HC_CONTROL) & HC_CONTROL_PIO_MODE) {
+                       dev_err(&hci->master.dev, "PIO mode is stuck\n");
+                       ret = -EIO;
+               } else {
+                       hci->io = &mipi_i3c_hci_dma;
+                       dev_info(&hci->master.dev, "Using DMA\n");
+               }
+       }
+
+       /* If no DMA, try PIO */
+       if (!hci->io && hci->PIO_regs) {
+               reg_set(HC_CONTROL, HC_CONTROL_PIO_MODE);
+               if (!(reg_read(HC_CONTROL) & HC_CONTROL_PIO_MODE)) {
+                       dev_err(&hci->master.dev, "DMA mode is stuck\n");
+                       ret = -EIO;
+               } else {
+                       hci->io = &mipi_i3c_hci_pio;
+                       dev_info(&hci->master.dev, "Using PIO\n");
+               }
+       }
+
+       if (!hci->io) {
+               dev_err(&hci->master.dev, "neither DMA nor PIO can be used\n");
+               if (!ret)
+                       ret = -EINVAL;
+               return ret;
+       }
+
+       return 0;
+}
+
+static int i3c_hci_probe(struct platform_device *pdev)
+{
+       struct i3c_hci *hci;
+       int irq, ret;
+
+       hci = devm_kzalloc(&pdev->dev, sizeof(*hci), GFP_KERNEL);
+       if (!hci)
+               return -ENOMEM;
+       hci->base_regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(hci->base_regs))
+               return PTR_ERR(hci->base_regs);
+
+       platform_set_drvdata(pdev, hci);
+       /* temporary for dev_printk's, to be replaced in i3c_master_register */
+       hci->master.dev.init_name = dev_name(&pdev->dev);
+
+       ret = i3c_hci_init(hci);
+       if (ret)
+               return ret;
+
+       irq = platform_get_irq(pdev, 0);
+       ret = devm_request_irq(&pdev->dev, irq, i3c_hci_irq_handler,
+                              0, NULL, hci);
+       if (ret)
+               return ret;
+
+       ret = i3c_master_register(&hci->master, &pdev->dev,
+                                 &i3c_hci_ops, false);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int i3c_hci_remove(struct platform_device *pdev)
+{
+       struct i3c_hci *hci = platform_get_drvdata(pdev);
+       int ret;
+
+       ret = i3c_master_unregister(&hci->master);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static const struct __maybe_unused of_device_id i3c_hci_of_match[] = {
+       { .compatible = "mipi-i3c-hci", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, i3c_hci_of_match);
+
+static struct platform_driver i3c_hci_driver = {
+       .probe = i3c_hci_probe,
+       .remove = i3c_hci_remove,
+       .driver = {
+               .name = "mipi-i3c-hci",
+               .of_match_table = of_match_ptr(i3c_hci_of_match),
+       },
+};
+module_platform_driver(i3c_hci_driver);
+
+MODULE_AUTHOR("Nicolas Pitre <npitre@baylibre.com>");
+MODULE_DESCRIPTION("MIPI I3C HCI driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/i3c/master/mipi-i3c-hci/dat.h b/drivers/i3c/master/mipi-i3c-hci/dat.h
new file mode 100644 (file)
index 0000000..1f0f345
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Common DAT related stuff
+ */
+
+#ifndef DAT_H
+#define DAT_H
+
+/* Global DAT flags */
+#define DAT_0_I2C_DEVICE               W0_BIT_(31)
+#define DAT_0_SIR_REJECT               W0_BIT_(13)
+#define DAT_0_IBI_PAYLOAD              W0_BIT_(12)
+
+struct hci_dat_ops {
+       int (*init)(struct i3c_hci *hci);
+       void (*cleanup)(struct i3c_hci *hci);
+       int (*alloc_entry)(struct i3c_hci *hci);
+       void (*free_entry)(struct i3c_hci *hci, unsigned int dat_idx);
+       void (*set_dynamic_addr)(struct i3c_hci *hci, unsigned int dat_idx, u8 addr);
+       void (*set_static_addr)(struct i3c_hci *hci, unsigned int dat_idx, u8 addr);
+       void (*set_flags)(struct i3c_hci *hci, unsigned int dat_idx, u32 w0, u32 w1);
+       void (*clear_flags)(struct i3c_hci *hci, unsigned int dat_idx, u32 w0, u32 w1);
+       int (*get_index)(struct i3c_hci *hci, u8 address);
+};
+
+extern const struct hci_dat_ops mipi_i3c_hci_dat_v1;
+
+#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/dat_v1.c b/drivers/i3c/master/mipi-i3c-hci/dat_v1.c
new file mode 100644 (file)
index 0000000..783e551
--- /dev/null
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/i3c/master.h>
+#include <linux/io.h>
+
+#include "hci.h"
+#include "dat.h"
+
+
+/*
+ * Device Address Table Structure
+ */
+
+#define DAT_1_AUTOCMD_HDR_CODE         W1_MASK(58, 51)
+#define DAT_1_AUTOCMD_MODE             W1_MASK(50, 48)
+#define DAT_1_AUTOCMD_VALUE            W1_MASK(47, 40)
+#define DAT_1_AUTOCMD_MASK             W1_MASK(39, 32)
+/*     DAT_0_I2C_DEVICE                W0_BIT_(31) */
+#define DAT_0_DEV_NACK_RETRY_CNT       W0_MASK(30, 29)
+#define DAT_0_RING_ID                  W0_MASK(28, 26)
+#define DAT_0_DYNADDR_PARITY           W0_BIT_(23)
+#define DAT_0_DYNAMIC_ADDRESS          W0_MASK(22, 16)
+#define DAT_0_TS                       W0_BIT_(15)
+#define DAT_0_MR_REJECT                        W0_BIT_(14)
+/*     DAT_0_SIR_REJECT                W0_BIT_(13) */
+/*     DAT_0_IBI_PAYLOAD               W0_BIT_(12) */
+#define DAT_0_STATIC_ADDRESS           W0_MASK(6, 0)
+
+#define dat_w0_read(i)         readl(hci->DAT_regs + (i) * 8)
+#define dat_w1_read(i)         readl(hci->DAT_regs + (i) * 8 + 4)
+#define dat_w0_write(i, v)     writel(v, hci->DAT_regs + (i) * 8)
+#define dat_w1_write(i, v)     writel(v, hci->DAT_regs + (i) * 8 + 4)
+
+static inline bool dynaddr_parity(unsigned int addr)
+{
+       addr |= 1 << 7;
+       addr += addr >> 4;
+       addr += addr >> 2;
+       addr += addr >> 1;
+       return (addr & 1);
+}
+
+static int hci_dat_v1_init(struct i3c_hci *hci)
+{
+       unsigned int dat_idx;
+
+       if (!hci->DAT_regs) {
+               dev_err(&hci->master.dev,
+                       "only DAT in register space is supported at the moment\n");
+               return -EOPNOTSUPP;
+       }
+       if (hci->DAT_entry_size != 8) {
+               dev_err(&hci->master.dev,
+                       "only 8-bytes DAT entries are supported at the moment\n");
+               return -EOPNOTSUPP;
+       }
+
+       /* use a bitmap for faster free slot search */
+       hci->DAT_data = bitmap_zalloc(hci->DAT_entries, GFP_KERNEL);
+       if (!hci->DAT_data)
+               return -ENOMEM;
+
+       /* clear them */
+       for (dat_idx = 0; dat_idx < hci->DAT_entries; dat_idx++) {
+               dat_w0_write(dat_idx, 0);
+               dat_w1_write(dat_idx, 0);
+       }
+
+       return 0;
+}
+
+static void hci_dat_v1_cleanup(struct i3c_hci *hci)
+{
+       bitmap_free(hci->DAT_data);
+       hci->DAT_data = NULL;
+}
+
+static int hci_dat_v1_alloc_entry(struct i3c_hci *hci)
+{
+       unsigned int dat_idx;
+
+       dat_idx = find_first_zero_bit(hci->DAT_data, hci->DAT_entries);
+       if (dat_idx >= hci->DAT_entries)
+               return -ENOENT;
+       __set_bit(dat_idx, hci->DAT_data);
+
+       /* default flags */
+       dat_w0_write(dat_idx, DAT_0_SIR_REJECT | DAT_0_MR_REJECT);
+
+       return dat_idx;
+}
+
+static void hci_dat_v1_free_entry(struct i3c_hci *hci, unsigned int dat_idx)
+{
+       dat_w0_write(dat_idx, 0);
+       dat_w1_write(dat_idx, 0);
+       __clear_bit(dat_idx, hci->DAT_data);
+}
+
+static void hci_dat_v1_set_dynamic_addr(struct i3c_hci *hci,
+                                       unsigned int dat_idx, u8 address)
+{
+       u32 dat_w0;
+
+       dat_w0 = dat_w0_read(dat_idx);
+       dat_w0 &= ~(DAT_0_DYNAMIC_ADDRESS | DAT_0_DYNADDR_PARITY);
+       dat_w0 |= FIELD_PREP(DAT_0_DYNAMIC_ADDRESS, address) |
+                 (dynaddr_parity(address) ? DAT_0_DYNADDR_PARITY : 0);
+       dat_w0_write(dat_idx, dat_w0);
+}
+
+static void hci_dat_v1_set_static_addr(struct i3c_hci *hci,
+                                      unsigned int dat_idx, u8 address)
+{
+       u32 dat_w0;
+
+       dat_w0 = dat_w0_read(dat_idx);
+       dat_w0 &= ~DAT_0_STATIC_ADDRESS;
+       dat_w0 |= FIELD_PREP(DAT_0_STATIC_ADDRESS, address);
+       dat_w0_write(dat_idx, dat_w0);
+}
+
+static void hci_dat_v1_set_flags(struct i3c_hci *hci, unsigned int dat_idx,
+                                u32 w0_flags, u32 w1_flags)
+{
+       u32 dat_w0, dat_w1;
+
+       dat_w0 = dat_w0_read(dat_idx);
+       dat_w1 = dat_w1_read(dat_idx);
+       dat_w0 |= w0_flags;
+       dat_w1 |= w1_flags;
+       dat_w0_write(dat_idx, dat_w0);
+       dat_w1_write(dat_idx, dat_w1);
+}
+
+static void hci_dat_v1_clear_flags(struct i3c_hci *hci, unsigned int dat_idx,
+                                  u32 w0_flags, u32 w1_flags)
+{
+       u32 dat_w0, dat_w1;
+
+       dat_w0 = dat_w0_read(dat_idx);
+       dat_w1 = dat_w1_read(dat_idx);
+       dat_w0 &= ~w0_flags;
+       dat_w1 &= ~w1_flags;
+       dat_w0_write(dat_idx, dat_w0);
+       dat_w1_write(dat_idx, dat_w1);
+}
+
+static int hci_dat_v1_get_index(struct i3c_hci *hci, u8 dev_addr)
+{
+       unsigned int dat_idx;
+       u32 dat_w0;
+
+       for (dat_idx = find_first_bit(hci->DAT_data, hci->DAT_entries);
+            dat_idx < hci->DAT_entries;
+            dat_idx = find_next_bit(hci->DAT_data, hci->DAT_entries, dat_idx)) {
+               dat_w0 = dat_w0_read(dat_idx);
+               if (FIELD_GET(DAT_0_DYNAMIC_ADDRESS, dat_w0) == dev_addr)
+                       return dat_idx;
+       }
+
+       return -ENODEV;
+}
+
+const struct hci_dat_ops mipi_i3c_hci_dat_v1 = {
+       .init                   = hci_dat_v1_init,
+       .cleanup                = hci_dat_v1_cleanup,
+       .alloc_entry            = hci_dat_v1_alloc_entry,
+       .free_entry             = hci_dat_v1_free_entry,
+       .set_dynamic_addr       = hci_dat_v1_set_dynamic_addr,
+       .set_static_addr        = hci_dat_v1_set_static_addr,
+       .set_flags              = hci_dat_v1_set_flags,
+       .clear_flags            = hci_dat_v1_clear_flags,
+       .get_index              = hci_dat_v1_get_index,
+};
diff --git a/drivers/i3c/master/mipi-i3c-hci/dct.h b/drivers/i3c/master/mipi-i3c-hci/dct.h
new file mode 100644 (file)
index 0000000..1028e0b
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Common DCT related stuff
+ */
+
+#ifndef DCT_H
+#define DCT_H
+
+void i3c_hci_dct_get_val(struct i3c_hci *hci, unsigned int dct_idx,
+                        u64 *pid, unsigned int *dcr, unsigned int *bcr);
+
+#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/dct_v1.c b/drivers/i3c/master/mipi-i3c-hci/dct_v1.c
new file mode 100644 (file)
index 0000000..acfd4d6
--- /dev/null
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ */
+
+#include <linux/device.h>
+#include <linux/bitfield.h>
+#include <linux/i3c/master.h>
+#include <linux/io.h>
+
+#include "hci.h"
+#include "dct.h"
+
+/*
+ * Device Characteristic Table
+ */
+
+void i3c_hci_dct_get_val(struct i3c_hci *hci, unsigned int dct_idx,
+                        u64 *pid, unsigned int *dcr, unsigned int *bcr)
+{
+       void __iomem *reg = hci->DCT_regs + dct_idx * 4 * 4;
+       u32 dct_entry_data[4];
+       unsigned int i;
+
+       for (i = 0; i < 4; i++) {
+               dct_entry_data[i] = readl(reg);
+               reg += 4;
+       }
+
+       *pid = ((u64)dct_entry_data[0]) << (47 - 32 + 1) |
+              FIELD_GET(W1_MASK(47, 32), dct_entry_data[1]);
+       *dcr = FIELD_GET(W2_MASK(71, 64), dct_entry_data[2]);
+       *bcr = FIELD_GET(W2_MASK(79, 72), dct_entry_data[2]);
+}
diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c
new file mode 100644 (file)
index 0000000..af873a9
--- /dev/null
@@ -0,0 +1,784 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Note: The I3C HCI v2.0 spec is still in flux. The IBI support is based on
+ * v1.x of the spec and v2.0 will likely be split out.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/i3c/master.h>
+#include <linux/io.h>
+
+#include "hci.h"
+#include "cmd.h"
+#include "ibi.h"
+
+
+/*
+ * Software Parameter Values (somewhat arb itrary for now).
+ * Some of them could be determined at run time eventually.
+ */
+
+#define XFER_RINGS                     1       /* max: 8 */
+#define XFER_RING_ENTRIES              16      /* max: 255 */
+
+#define IBI_RINGS                      1       /* max: 8 */
+#define IBI_STATUS_RING_ENTRIES                32      /* max: 255 */
+#define IBI_CHUNK_CACHELINES           1       /* max: 256 bytes equivalent */
+#define IBI_CHUNK_POOL_SIZE            128     /* max: 1023 */
+
+/*
+ * Ring Header Preamble
+ */
+
+#define rhs_reg_read(r)                readl(hci->RHS_regs + (RHS_##r))
+#define rhs_reg_write(r, v)    writel(v, hci->RHS_regs + (RHS_##r))
+
+#define RHS_CONTROL                    0x00
+#define PREAMBLE_SIZE                  GENMASK(31, 24) /* Preamble Section Size */
+#define HEADER_SIZE                    GENMASK(23, 16) /* Ring Header Size */
+#define MAX_HEADER_COUNT_CAP           GENMASK(7, 4) /* HC Max Header Count */
+#define MAX_HEADER_COUNT               GENMASK(3, 0) /* Driver Max Header Count */
+
+#define RHS_RHn_OFFSET(n)              (0x04 + (n)*4)
+
+/*
+ * Ring Header (Per-Ring Bundle)
+ */
+
+#define rh_reg_read(r)         readl(rh->regs + (RH_##r))
+#define rh_reg_write(r, v)     writel(v, rh->regs + (RH_##r))
+
+#define RH_CR_SETUP                    0x00    /* Command/Response Ring */
+#define CR_XFER_STRUCT_SIZE            GENMASK(31, 24)
+#define CR_RESP_STRUCT_SIZE            GENMASK(23, 16)
+#define CR_RING_SIZE                   GENMASK(8, 0)
+
+#define RH_IBI_SETUP                   0x04
+#define IBI_STATUS_STRUCT_SIZE         GENMASK(31, 24)
+#define IBI_STATUS_RING_SIZE           GENMASK(23, 16)
+#define IBI_DATA_CHUNK_SIZE            GENMASK(12, 10)
+#define IBI_DATA_CHUNK_COUNT           GENMASK(9, 0)
+
+#define RH_CHUNK_CONTROL                       0x08
+
+#define RH_INTR_STATUS                 0x10
+#define RH_INTR_STATUS_ENABLE          0x14
+#define RH_INTR_SIGNAL_ENABLE          0x18
+#define RH_INTR_FORCE                  0x1c
+#define INTR_IBI_READY                 BIT(12)
+#define INTR_TRANSFER_COMPLETION       BIT(11)
+#define INTR_RING_OP                   BIT(10)
+#define INTR_TRANSFER_ERR              BIT(9)
+#define INTR_WARN_INS_STOP_MODE                BIT(7)
+#define INTR_IBI_RING_FULL             BIT(6)
+#define INTR_TRANSFER_ABORT            BIT(5)
+
+#define RH_RING_STATUS                 0x20
+#define RING_STATUS_LOCKED             BIT(3)
+#define RING_STATUS_ABORTED            BIT(2)
+#define RING_STATUS_RUNNING            BIT(1)
+#define RING_STATUS_ENABLED            BIT(0)
+
+#define RH_RING_CONTROL                        0x24
+#define RING_CTRL_ABORT                        BIT(2)
+#define RING_CTRL_RUN_STOP             BIT(1)
+#define RING_CTRL_ENABLE               BIT(0)
+
+#define RH_RING_OPERATION1             0x28
+#define RING_OP1_IBI_DEQ_PTR           GENMASK(23, 16)
+#define RING_OP1_CR_SW_DEQ_PTR         GENMASK(15, 8)
+#define RING_OP1_CR_ENQ_PTR            GENMASK(7, 0)
+
+#define RH_RING_OPERATION2             0x2c
+#define RING_OP2_IBI_ENQ_PTR           GENMASK(23, 16)
+#define RING_OP2_CR_DEQ_PTR            GENMASK(7, 0)
+
+#define RH_CMD_RING_BASE_LO            0x30
+#define RH_CMD_RING_BASE_HI            0x34
+#define RH_RESP_RING_BASE_LO           0x38
+#define RH_RESP_RING_BASE_HI           0x3c
+#define RH_IBI_STATUS_RING_BASE_LO     0x40
+#define RH_IBI_STATUS_RING_BASE_HI     0x44
+#define RH_IBI_DATA_RING_BASE_LO       0x48
+#define RH_IBI_DATA_RING_BASE_HI       0x4c
+
+#define RH_CMD_RING_SG                 0x50    /* Ring Scatter Gather Support */
+#define RH_RESP_RING_SG                        0x54
+#define RH_IBI_STATUS_RING_SG          0x58
+#define RH_IBI_DATA_RING_SG            0x5c
+#define RING_SG_BLP                    BIT(31) /* Buffer Vs. List Pointer */
+#define RING_SG_LIST_SIZE              GENMASK(15, 0)
+
+/*
+ * Data Buffer Descriptor (in memory)
+ */
+
+#define DATA_BUF_BLP                   BIT(31) /* Buffer Vs. List Pointer */
+#define DATA_BUF_IOC                   BIT(30) /* Interrupt on Completion */
+#define DATA_BUF_BLOCK_SIZE            GENMASK(15, 0)
+
+
+struct hci_rh_data {
+       void __iomem *regs;
+       void *xfer, *resp, *ibi_status, *ibi_data;
+       dma_addr_t xfer_dma, resp_dma, ibi_status_dma, ibi_data_dma;
+       unsigned int xfer_entries, ibi_status_entries, ibi_chunks_total;
+       unsigned int xfer_struct_sz, resp_struct_sz, ibi_status_sz, ibi_chunk_sz;
+       unsigned int done_ptr, ibi_chunk_ptr;
+       struct hci_xfer **src_xfers;
+       spinlock_t lock;
+       struct completion op_done;
+};
+
+struct hci_rings_data {
+       unsigned int total;
+       struct hci_rh_data headers[];
+};
+
+struct hci_dma_dev_ibi_data {
+       struct i3c_generic_ibi_pool *pool;
+       unsigned int max_len;
+};
+
+static inline u32 lo32(dma_addr_t physaddr)
+{
+       return physaddr;
+}
+
+static inline u32 hi32(dma_addr_t physaddr)
+{
+       /* trickery to avoid compiler warnings on 32-bit build targets */
+       if (sizeof(dma_addr_t) > 4) {
+               u64 hi = physaddr;
+               return hi >> 32;
+       }
+       return 0;
+}
+
+static void hci_dma_cleanup(struct i3c_hci *hci)
+{
+       struct hci_rings_data *rings = hci->io_data;
+       struct hci_rh_data *rh;
+       unsigned int i;
+
+       if (!rings)
+               return;
+
+       for (i = 0; i < rings->total; i++) {
+               rh = &rings->headers[i];
+
+               rh_reg_write(RING_CONTROL, 0);
+               rh_reg_write(CR_SETUP, 0);
+               rh_reg_write(IBI_SETUP, 0);
+               rh_reg_write(INTR_SIGNAL_ENABLE, 0);
+
+               if (rh->xfer)
+                       dma_free_coherent(&hci->master.dev,
+                                         rh->xfer_struct_sz * rh->xfer_entries,
+                                         rh->xfer, rh->xfer_dma);
+               if (rh->resp)
+                       dma_free_coherent(&hci->master.dev,
+                                         rh->resp_struct_sz * rh->xfer_entries,
+                                         rh->resp, rh->resp_dma);
+               kfree(rh->src_xfers);
+               if (rh->ibi_status)
+                       dma_free_coherent(&hci->master.dev,
+                                         rh->ibi_status_sz * rh->ibi_status_entries,
+                                         rh->ibi_status, rh->ibi_status_dma);
+               if (rh->ibi_data_dma)
+                       dma_unmap_single(&hci->master.dev, rh->ibi_data_dma,
+                                        rh->ibi_chunk_sz * rh->ibi_chunks_total,
+                                        DMA_FROM_DEVICE);
+               kfree(rh->ibi_data);
+       }
+
+       rhs_reg_write(CONTROL, 0);
+
+       kfree(rings);
+       hci->io_data = NULL;
+}
+
+static int hci_dma_init(struct i3c_hci *hci)
+{
+       struct hci_rings_data *rings;
+       struct hci_rh_data *rh;
+       u32 regval;
+       unsigned int i, nr_rings, xfers_sz, resps_sz;
+       unsigned int ibi_status_ring_sz, ibi_data_ring_sz;
+       int ret;
+
+       regval = rhs_reg_read(CONTROL);
+       nr_rings = FIELD_GET(MAX_HEADER_COUNT_CAP, regval);
+       dev_info(&hci->master.dev, "%d DMA rings available\n", nr_rings);
+       if (unlikely(nr_rings > 8)) {
+               dev_err(&hci->master.dev, "number of rings should be <= 8\n");
+               nr_rings = 8;
+       }
+       if (nr_rings > XFER_RINGS)
+               nr_rings = XFER_RINGS;
+       rings = kzalloc(sizeof(*rings) + nr_rings * sizeof(*rh), GFP_KERNEL);
+       if (!rings)
+               return -ENOMEM;
+       hci->io_data = rings;
+       rings->total = nr_rings;
+
+       for (i = 0; i < rings->total; i++) {
+               u32 offset = rhs_reg_read(RHn_OFFSET(i));
+
+               dev_info(&hci->master.dev, "Ring %d at offset %#x\n", i, offset);
+               ret = -EINVAL;
+               if (!offset)
+                       goto err_out;
+               rh = &rings->headers[i];
+               rh->regs = hci->base_regs + offset;
+               spin_lock_init(&rh->lock);
+               init_completion(&rh->op_done);
+
+               rh->xfer_entries = XFER_RING_ENTRIES;
+
+               regval = rh_reg_read(CR_SETUP);
+               rh->xfer_struct_sz = FIELD_GET(CR_XFER_STRUCT_SIZE, regval);
+               rh->resp_struct_sz = FIELD_GET(CR_RESP_STRUCT_SIZE, regval);
+               DBG("xfer_struct_sz = %d, resp_struct_sz = %d",
+                   rh->xfer_struct_sz, rh->resp_struct_sz);
+               xfers_sz = rh->xfer_struct_sz * rh->xfer_entries;
+               resps_sz = rh->resp_struct_sz * rh->xfer_entries;
+
+               rh->xfer = dma_alloc_coherent(&hci->master.dev, xfers_sz,
+                                             &rh->xfer_dma, GFP_KERNEL);
+               rh->resp = dma_alloc_coherent(&hci->master.dev, resps_sz,
+                                             &rh->resp_dma, GFP_KERNEL);
+               rh->src_xfers =
+                       kmalloc_array(rh->xfer_entries, sizeof(*rh->src_xfers),
+                                     GFP_KERNEL);
+               ret = -ENOMEM;
+               if (!rh->xfer || !rh->resp || !rh->src_xfers)
+                       goto err_out;
+
+               rh_reg_write(CMD_RING_BASE_LO, lo32(rh->xfer_dma));
+               rh_reg_write(CMD_RING_BASE_HI, hi32(rh->xfer_dma));
+               rh_reg_write(RESP_RING_BASE_LO, lo32(rh->resp_dma));
+               rh_reg_write(RESP_RING_BASE_HI, hi32(rh->resp_dma));
+
+               regval = FIELD_PREP(CR_RING_SIZE, rh->xfer_entries);
+               rh_reg_write(CR_SETUP, regval);
+
+               rh_reg_write(INTR_STATUS_ENABLE, 0xffffffff);
+               rh_reg_write(INTR_SIGNAL_ENABLE, INTR_IBI_READY |
+                                                INTR_TRANSFER_COMPLETION |
+                                                INTR_RING_OP |
+                                                INTR_TRANSFER_ERR |
+                                                INTR_WARN_INS_STOP_MODE |
+                                                INTR_IBI_RING_FULL |
+                                                INTR_TRANSFER_ABORT);
+
+               /* IBIs */
+
+               if (i >= IBI_RINGS)
+                       goto ring_ready;
+
+               regval = rh_reg_read(IBI_SETUP);
+               rh->ibi_status_sz = FIELD_GET(IBI_STATUS_STRUCT_SIZE, regval);
+               rh->ibi_status_entries = IBI_STATUS_RING_ENTRIES;
+               rh->ibi_chunks_total = IBI_CHUNK_POOL_SIZE;
+
+               rh->ibi_chunk_sz = dma_get_cache_alignment();
+               rh->ibi_chunk_sz *= IBI_CHUNK_CACHELINES;
+               BUG_ON(rh->ibi_chunk_sz > 256);
+
+               ibi_status_ring_sz = rh->ibi_status_sz * rh->ibi_status_entries;
+               ibi_data_ring_sz = rh->ibi_chunk_sz * rh->ibi_chunks_total;
+
+               rh->ibi_status =
+                       dma_alloc_coherent(&hci->master.dev, ibi_status_ring_sz,
+                                          &rh->ibi_status_dma, GFP_KERNEL);
+               rh->ibi_data = kmalloc(ibi_data_ring_sz, GFP_KERNEL);
+               ret = -ENOMEM;
+               if (!rh->ibi_status || !rh->ibi_data)
+                       goto err_out;
+               rh->ibi_data_dma =
+                       dma_map_single(&hci->master.dev, rh->ibi_data,
+                                      ibi_data_ring_sz, DMA_FROM_DEVICE);
+               if (dma_mapping_error(&hci->master.dev, rh->ibi_data_dma)) {
+                       rh->ibi_data_dma = 0;
+                       ret = -ENOMEM;
+                       goto err_out;
+               }
+
+               regval = FIELD_PREP(IBI_STATUS_RING_SIZE,
+                                   rh->ibi_status_entries) |
+                        FIELD_PREP(IBI_DATA_CHUNK_SIZE,
+                                   ilog2(rh->ibi_chunk_sz) - 2) |
+                        FIELD_PREP(IBI_DATA_CHUNK_COUNT,
+                                   rh->ibi_chunks_total);
+               rh_reg_write(IBI_SETUP, regval);
+
+               regval = rh_reg_read(INTR_SIGNAL_ENABLE);
+               regval |= INTR_IBI_READY;
+               rh_reg_write(INTR_SIGNAL_ENABLE, regval);
+
+ring_ready:
+               rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE);
+       }
+
+       regval = FIELD_PREP(MAX_HEADER_COUNT, rings->total);
+       rhs_reg_write(CONTROL, regval);
+       return 0;
+
+err_out:
+       hci_dma_cleanup(hci);
+       return ret;
+}
+
+static void hci_dma_unmap_xfer(struct i3c_hci *hci,
+                              struct hci_xfer *xfer_list, unsigned int n)
+{
+       struct hci_xfer *xfer;
+       unsigned int i;
+
+       for (i = 0; i < n; i++) {
+               xfer = xfer_list + i;
+               dma_unmap_single(&hci->master.dev,
+                                xfer->data_dma, xfer->data_len,
+                                xfer->rnw ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+       }
+}
+
+static int hci_dma_queue_xfer(struct i3c_hci *hci,
+                             struct hci_xfer *xfer_list, int n)
+{
+       struct hci_rings_data *rings = hci->io_data;
+       struct hci_rh_data *rh;
+       unsigned int i, ring, enqueue_ptr;
+       u32 op1_val, op2_val;
+
+       /* For now we only use ring 0 */
+       ring = 0;
+       rh = &rings->headers[ring];
+
+       op1_val = rh_reg_read(RING_OPERATION1);
+       enqueue_ptr = FIELD_GET(RING_OP1_CR_ENQ_PTR, op1_val);
+       for (i = 0; i < n; i++) {
+               struct hci_xfer *xfer = xfer_list + i;
+               u32 *ring_data = rh->xfer + rh->xfer_struct_sz * enqueue_ptr;
+
+               /* store cmd descriptor */
+               *ring_data++ = xfer->cmd_desc[0];
+               *ring_data++ = xfer->cmd_desc[1];
+               if (hci->cmd == &mipi_i3c_hci_cmd_v2) {
+                       *ring_data++ = xfer->cmd_desc[2];
+                       *ring_data++ = xfer->cmd_desc[3];
+               }
+
+               /* first word of Data Buffer Descriptor Structure */
+               if (!xfer->data)
+                       xfer->data_len = 0;
+               *ring_data++ =
+                       FIELD_PREP(DATA_BUF_BLOCK_SIZE, xfer->data_len) |
+                       ((i == n - 1) ? DATA_BUF_IOC : 0);
+
+               /* 2nd and 3rd words of Data Buffer Descriptor Structure */
+               if (xfer->data) {
+                       xfer->data_dma =
+                               dma_map_single(&hci->master.dev,
+                                              xfer->data,
+                                              xfer->data_len,
+                                              xfer->rnw ?
+                                                 DMA_FROM_DEVICE :
+                                                 DMA_TO_DEVICE);
+                       if (dma_mapping_error(&hci->master.dev,
+                                             xfer->data_dma)) {
+                               hci_dma_unmap_xfer(hci, xfer_list, i);
+                               return -ENOMEM;
+                       }
+                       *ring_data++ = lo32(xfer->data_dma);
+                       *ring_data++ = hi32(xfer->data_dma);
+               } else {
+                       *ring_data++ = 0;
+                       *ring_data++ = 0;
+               }
+
+               /* remember corresponding xfer struct */
+               rh->src_xfers[enqueue_ptr] = xfer;
+               /* remember corresponding ring/entry for this xfer structure */
+               xfer->ring_number = ring;
+               xfer->ring_entry = enqueue_ptr;
+
+               enqueue_ptr = (enqueue_ptr + 1) % rh->xfer_entries;
+
+               /*
+                * We may update the hardware view of the enqueue pointer
+                * only if we didn't reach its dequeue pointer.
+                */
+               op2_val = rh_reg_read(RING_OPERATION2);
+               if (enqueue_ptr == FIELD_GET(RING_OP2_CR_DEQ_PTR, op2_val)) {
+                       /* the ring is full */
+                       hci_dma_unmap_xfer(hci, xfer_list, i + 1);
+                       return -EBUSY;
+               }
+       }
+
+       /* take care to update the hardware enqueue pointer atomically */
+       spin_lock_irq(&rh->lock);
+       op1_val = rh_reg_read(RING_OPERATION1);
+       op1_val &= ~RING_OP1_CR_ENQ_PTR;
+       op1_val |= FIELD_PREP(RING_OP1_CR_ENQ_PTR, enqueue_ptr);
+       rh_reg_write(RING_OPERATION1, op1_val);
+       spin_unlock_irq(&rh->lock);
+
+       return 0;
+}
+
+static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
+                                struct hci_xfer *xfer_list, int n)
+{
+       struct hci_rings_data *rings = hci->io_data;
+       struct hci_rh_data *rh = &rings->headers[xfer_list[0].ring_number];
+       unsigned int i;
+       bool did_unqueue = false;
+
+       /* stop the ring */
+       rh_reg_write(RING_CONTROL, RING_CTRL_ABORT);
+       if (wait_for_completion_timeout(&rh->op_done, HZ) == 0) {
+               /*
+                * We're deep in it if ever this condition is ever met.
+                * Hardware might still be writing to memory, etc.
+                * Better suspend the world than risking silent corruption.
+                */
+               dev_crit(&hci->master.dev, "unable to abort the ring\n");
+               BUG();
+       }
+
+       for (i = 0; i < n; i++) {
+               struct hci_xfer *xfer = xfer_list + i;
+               int idx = xfer->ring_entry;
+
+               /*
+                * At the time the abort happened, the xfer might have
+                * completed already. If not then replace corresponding
+                * descriptor entries with a no-op.
+                */
+               if (idx >= 0) {
+                       u32 *ring_data = rh->xfer + rh->xfer_struct_sz * idx;
+
+                       /* store no-op cmd descriptor */
+                       *ring_data++ = FIELD_PREP(CMD_0_ATTR, 0x7);
+                       *ring_data++ = 0;
+                       if (hci->cmd == &mipi_i3c_hci_cmd_v2) {
+                               *ring_data++ = 0;
+                               *ring_data++ = 0;
+                       }
+
+                       /* disassociate this xfer struct */
+                       rh->src_xfers[idx] = NULL;
+
+                       /* and unmap it */
+                       hci_dma_unmap_xfer(hci, xfer, 1);
+
+                       did_unqueue = true;
+               }
+       }
+
+       /* restart the ring */
+       rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE);
+
+       return did_unqueue;
+}
+
+static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh)
+{
+       u32 op1_val, op2_val, resp, *ring_resp;
+       unsigned int tid, done_ptr = rh->done_ptr;
+       struct hci_xfer *xfer;
+
+       for (;;) {
+               op2_val = rh_reg_read(RING_OPERATION2);
+               if (done_ptr == FIELD_GET(RING_OP2_CR_DEQ_PTR, op2_val))
+                       break;
+
+               ring_resp = rh->resp + rh->resp_struct_sz * done_ptr;
+               resp = *ring_resp;
+               tid = RESP_TID(resp);
+               DBG("resp = 0x%08x", resp);
+
+               xfer = rh->src_xfers[done_ptr];
+               if (!xfer) {
+                       DBG("orphaned ring entry");
+               } else {
+                       hci_dma_unmap_xfer(hci, xfer, 1);
+                       xfer->ring_entry = -1;
+                       xfer->response = resp;
+                       if (tid != xfer->cmd_tid) {
+                               dev_err(&hci->master.dev,
+                                       "response tid=%d when expecting %d\n",
+                                       tid, xfer->cmd_tid);
+                               /* TODO: do something about it? */
+                       }
+                       if (xfer->completion)
+                               complete(xfer->completion);
+               }
+
+               done_ptr = (done_ptr + 1) % rh->xfer_entries;
+               rh->done_ptr = done_ptr;
+       }
+
+       /* take care to update the software dequeue pointer atomically */
+       spin_lock(&rh->lock);
+       op1_val = rh_reg_read(RING_OPERATION1);
+       op1_val &= ~RING_OP1_CR_SW_DEQ_PTR;
+       op1_val |= FIELD_PREP(RING_OP1_CR_SW_DEQ_PTR, done_ptr);
+       rh_reg_write(RING_OPERATION1, op1_val);
+       spin_unlock(&rh->lock);
+}
+
+static int hci_dma_request_ibi(struct i3c_hci *hci, struct i3c_dev_desc *dev,
+                              const struct i3c_ibi_setup *req)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       struct i3c_generic_ibi_pool *pool;
+       struct hci_dma_dev_ibi_data *dev_ibi;
+
+       dev_ibi = kmalloc(sizeof(*dev_ibi), GFP_KERNEL);
+       if (!dev_ibi)
+               return -ENOMEM;
+       pool = i3c_generic_ibi_alloc_pool(dev, req);
+       if (IS_ERR(pool)) {
+               kfree(dev_ibi);
+               return PTR_ERR(pool);
+       }
+       dev_ibi->pool = pool;
+       dev_ibi->max_len = req->max_payload_len;
+       dev_data->ibi_data = dev_ibi;
+       return 0;
+}
+
+static void hci_dma_free_ibi(struct i3c_hci *hci, struct i3c_dev_desc *dev)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       struct hci_dma_dev_ibi_data *dev_ibi = dev_data->ibi_data;
+
+       dev_data->ibi_data = NULL;
+       i3c_generic_ibi_free_pool(dev_ibi->pool);
+       kfree(dev_ibi);
+}
+
+static void hci_dma_recycle_ibi_slot(struct i3c_hci *hci,
+                                    struct i3c_dev_desc *dev,
+                                    struct i3c_ibi_slot *slot)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       struct hci_dma_dev_ibi_data *dev_ibi = dev_data->ibi_data;
+
+       i3c_generic_ibi_recycle_slot(dev_ibi->pool, slot);
+}
+
+static void hci_dma_process_ibi(struct i3c_hci *hci, struct hci_rh_data *rh)
+{
+       struct i3c_dev_desc *dev;
+       struct i3c_hci_dev_data *dev_data;
+       struct hci_dma_dev_ibi_data *dev_ibi;
+       struct i3c_ibi_slot *slot;
+       u32 op1_val, op2_val, ibi_status_error;
+       unsigned int ptr, enq_ptr, deq_ptr;
+       unsigned int ibi_size, ibi_chunks, ibi_data_offset, first_part;
+       int ibi_addr, last_ptr;
+       void *ring_ibi_data;
+       dma_addr_t ring_ibi_data_dma;
+
+       op1_val = rh_reg_read(RING_OPERATION1);
+       deq_ptr = FIELD_GET(RING_OP1_IBI_DEQ_PTR, op1_val);
+
+       op2_val = rh_reg_read(RING_OPERATION2);
+       enq_ptr = FIELD_GET(RING_OP2_IBI_ENQ_PTR, op2_val);
+
+       ibi_status_error = 0;
+       ibi_addr = -1;
+       ibi_chunks = 0;
+       ibi_size = 0;
+       last_ptr = -1;
+
+       /* let's find all we can about this IBI */
+       for (ptr = deq_ptr; ptr != enq_ptr;
+            ptr = (ptr + 1) % rh->ibi_status_entries) {
+               u32 ibi_status, *ring_ibi_status;
+               unsigned int chunks;
+
+               ring_ibi_status = rh->ibi_status + rh->ibi_status_sz * ptr;
+               ibi_status = *ring_ibi_status;
+               DBG("status = %#x", ibi_status);
+
+               if (ibi_status_error) {
+                       /* we no longer care */
+               } else if (ibi_status & IBI_ERROR) {
+                       ibi_status_error = ibi_status;
+               } else if (ibi_addr ==  -1) {
+                       ibi_addr = FIELD_GET(IBI_TARGET_ADDR, ibi_status);
+               } else if (ibi_addr != FIELD_GET(IBI_TARGET_ADDR, ibi_status)) {
+                       /* the address changed unexpectedly */
+                       ibi_status_error = ibi_status;
+               }
+
+               chunks = FIELD_GET(IBI_CHUNKS, ibi_status);
+               ibi_chunks += chunks;
+               if (!(ibi_status & IBI_LAST_STATUS)) {
+                       ibi_size += chunks * rh->ibi_chunk_sz;
+               } else {
+                       ibi_size += FIELD_GET(IBI_DATA_LENGTH, ibi_status);
+                       last_ptr = ptr;
+                       break;
+               }
+       }
+
+       /* validate what we've got */
+
+       if (last_ptr == -1) {
+               /* this IBI sequence is not yet complete */
+               DBG("no LAST_STATUS available (e=%d d=%d)", enq_ptr, deq_ptr);
+               return;
+       }
+       deq_ptr = last_ptr + 1;
+       deq_ptr %= rh->ibi_status_entries;
+
+       if (ibi_status_error) {
+               dev_err(&hci->master.dev, "IBI error from %#x\n", ibi_addr);
+               goto done;
+       }
+
+       /* determine who this is for */
+       dev = i3c_hci_addr_to_dev(hci, ibi_addr);
+       if (!dev) {
+               dev_err(&hci->master.dev,
+                       "IBI for unknown device %#x\n", ibi_addr);
+               goto done;
+       }
+
+       dev_data = i3c_dev_get_master_data(dev);
+       dev_ibi = dev_data->ibi_data;
+       if (ibi_size > dev_ibi->max_len) {
+               dev_err(&hci->master.dev, "IBI payload too big (%d > %d)\n",
+                       ibi_size, dev_ibi->max_len);
+               goto done;
+       }
+
+       /*
+        * This ring model is not suitable for zero-copy processing of IBIs.
+        * We have the data chunk ring wrap-around to deal with, meaning
+        * that the payload might span multiple chunks beginning at the
+        * end of the ring and wrap to the start of the ring. Furthermore
+        * there is no guarantee that those chunks will be released in order
+        * and in a timely manner by the upper driver. So let's just copy
+        * them to a discrete buffer. In practice they're supposed to be
+        * small anyway.
+        */
+       slot = i3c_generic_ibi_get_free_slot(dev_ibi->pool);
+       if (!slot) {
+               dev_err(&hci->master.dev, "no free slot for IBI\n");
+               goto done;
+       }
+
+       /* copy first part of the payload */
+       ibi_data_offset = rh->ibi_chunk_sz * rh->ibi_chunk_ptr;
+       ring_ibi_data = rh->ibi_data + ibi_data_offset;
+       ring_ibi_data_dma = rh->ibi_data_dma + ibi_data_offset;
+       first_part = (rh->ibi_chunks_total - rh->ibi_chunk_ptr)
+                       * rh->ibi_chunk_sz;
+       if (first_part > ibi_size)
+               first_part = ibi_size;
+       dma_sync_single_for_cpu(&hci->master.dev, ring_ibi_data_dma,
+                               first_part, DMA_FROM_DEVICE);
+       memcpy(slot->data, ring_ibi_data, first_part);
+
+       /* copy second part if any */
+       if (ibi_size > first_part) {
+               /* we wrap back to the start and copy remaining data */
+               ring_ibi_data = rh->ibi_data;
+               ring_ibi_data_dma = rh->ibi_data_dma;
+               dma_sync_single_for_cpu(&hci->master.dev, ring_ibi_data_dma,
+                                       ibi_size - first_part, DMA_FROM_DEVICE);
+               memcpy(slot->data + first_part, ring_ibi_data,
+                      ibi_size - first_part);
+       }
+
+       /* submit it */
+       slot->dev = dev;
+       slot->len = ibi_size;
+       i3c_master_queue_ibi(dev, slot);
+
+done:
+       /* take care to update the ibi dequeue pointer atomically */
+       spin_lock(&rh->lock);
+       op1_val = rh_reg_read(RING_OPERATION1);
+       op1_val &= ~RING_OP1_IBI_DEQ_PTR;
+       op1_val |= FIELD_PREP(RING_OP1_IBI_DEQ_PTR, deq_ptr);
+       rh_reg_write(RING_OPERATION1, op1_val);
+       spin_unlock(&rh->lock);
+
+       /* update the chunk pointer */
+       rh->ibi_chunk_ptr += ibi_chunks;
+       rh->ibi_chunk_ptr %= rh->ibi_chunks_total;
+
+       /* and tell the hardware about freed chunks */
+       rh_reg_write(CHUNK_CONTROL, rh_reg_read(CHUNK_CONTROL) + ibi_chunks);
+}
+
+static bool hci_dma_irq_handler(struct i3c_hci *hci, unsigned int mask)
+{
+       struct hci_rings_data *rings = hci->io_data;
+       unsigned int i;
+       bool handled = false;
+
+       for (i = 0; mask && i < 8; i++) {
+               struct hci_rh_data *rh;
+               u32 status;
+
+               if (!(mask & BIT(i)))
+                       continue;
+               mask &= ~BIT(i);
+
+               rh = &rings->headers[i];
+               status = rh_reg_read(INTR_STATUS);
+               DBG("rh%d status: %#x", i, status);
+               if (!status)
+                       continue;
+               rh_reg_write(INTR_STATUS, status);
+
+               if (status & INTR_IBI_READY)
+                       hci_dma_process_ibi(hci, rh);
+               if (status & (INTR_TRANSFER_COMPLETION | INTR_TRANSFER_ERR))
+                       hci_dma_xfer_done(hci, rh);
+               if (status & INTR_RING_OP)
+                       complete(&rh->op_done);
+
+               if (status & INTR_TRANSFER_ABORT)
+                       dev_notice_ratelimited(&hci->master.dev,
+                               "ring %d: Transfer Aborted\n", i);
+               if (status & INTR_WARN_INS_STOP_MODE)
+                       dev_warn_ratelimited(&hci->master.dev,
+                               "ring %d: Inserted Stop on Mode Change\n", i);
+               if (status & INTR_IBI_RING_FULL)
+                       dev_err_ratelimited(&hci->master.dev,
+                               "ring %d: IBI Ring Full Condition\n", i);
+
+               handled = true;
+       }
+
+       return handled;
+}
+
+const struct hci_io_ops mipi_i3c_hci_dma = {
+       .init                   = hci_dma_init,
+       .cleanup                = hci_dma_cleanup,
+       .queue_xfer             = hci_dma_queue_xfer,
+       .dequeue_xfer           = hci_dma_dequeue_xfer,
+       .irq_handler            = hci_dma_irq_handler,
+       .request_ibi            = hci_dma_request_ibi,
+       .free_ibi               = hci_dma_free_ibi,
+       .recycle_ibi_slot       = hci_dma_recycle_ibi_slot,
+};
diff --git a/drivers/i3c/master/mipi-i3c-hci/ext_caps.c b/drivers/i3c/master/mipi-i3c-hci/ext_caps.c
new file mode 100644 (file)
index 0000000..2e9b23e
--- /dev/null
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/i3c/master.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+
+#include "hci.h"
+#include "ext_caps.h"
+#include "xfer_mode_rate.h"
+
+
+/* Extended Capability Header */
+#define CAP_HEADER_LENGTH              GENMASK(23, 8)
+#define CAP_HEADER_ID                  GENMASK(7, 0)
+
+static int hci_extcap_hardware_id(struct i3c_hci *hci, void __iomem *base)
+{
+       hci->vendor_mipi_id     = readl(base + 0x04);
+       hci->vendor_version_id  = readl(base + 0x08);
+       hci->vendor_product_id  = readl(base + 0x0c);
+
+       dev_info(&hci->master.dev, "vendor MIPI ID: %#x\n", hci->vendor_mipi_id);
+       dev_info(&hci->master.dev, "vendor version ID: %#x\n", hci->vendor_version_id);
+       dev_info(&hci->master.dev, "vendor product ID: %#x\n", hci->vendor_product_id);
+
+       /* ought to go in a table if this grows too much */
+       switch (hci->vendor_mipi_id) {
+       case MIPI_VENDOR_NXP:
+               hci->quirks |= HCI_QUIRK_RAW_CCC;
+               DBG("raw CCC quirks set");
+               break;
+       }
+
+       return 0;
+}
+
+static int hci_extcap_master_config(struct i3c_hci *hci, void __iomem *base)
+{
+       u32 master_config = readl(base + 0x04);
+       unsigned int operation_mode = FIELD_GET(GENMASK(5, 4), master_config);
+       static const char * const functionality[] = {
+               "(unknown)", "master only", "target only",
+               "primary/secondary master" };
+       dev_info(&hci->master.dev, "operation mode: %s\n", functionality[operation_mode]);
+       if (operation_mode & 0x1)
+               return 0;
+       dev_err(&hci->master.dev, "only master mode is currently supported\n");
+       return -EOPNOTSUPP;
+}
+
+static int hci_extcap_multi_bus(struct i3c_hci *hci, void __iomem *base)
+{
+       u32 bus_instance = readl(base + 0x04);
+       unsigned int count = FIELD_GET(GENMASK(3, 0), bus_instance);
+
+       dev_info(&hci->master.dev, "%d bus instances\n", count);
+       return 0;
+}
+
+static int hci_extcap_xfer_modes(struct i3c_hci *hci, void __iomem *base)
+{
+       u32 header = readl(base);
+       u32 entries = FIELD_GET(CAP_HEADER_LENGTH, header) - 1;
+       unsigned int index;
+
+       dev_info(&hci->master.dev, "transfer mode table has %d entries\n",
+                entries);
+       base += 4;  /* skip header */
+       for (index = 0; index < entries; index++) {
+               u32 mode_entry = readl(base);
+
+               DBG("mode %d: 0x%08x", index, mode_entry);
+               /* TODO: will be needed when I3C core does more than SDR */
+               base += 4;
+       }
+
+       return 0;
+}
+
+static int hci_extcap_xfer_rates(struct i3c_hci *hci, void __iomem *base)
+{
+       u32 header = readl(base);
+       u32 entries = FIELD_GET(CAP_HEADER_LENGTH, header) - 1;
+       u32 rate_entry;
+       unsigned int index, rate, rate_id, mode_id;
+
+       base += 4;  /* skip header */
+
+       dev_info(&hci->master.dev, "available data rates:\n");
+       for (index = 0; index < entries; index++) {
+               rate_entry = readl(base);
+               DBG("entry %d: 0x%08x", index, rate_entry);
+               rate = FIELD_GET(XFERRATE_ACTUAL_RATE_KHZ, rate_entry);
+               rate_id = FIELD_GET(XFERRATE_RATE_ID, rate_entry);
+               mode_id = FIELD_GET(XFERRATE_MODE_ID, rate_entry);
+               dev_info(&hci->master.dev, "rate %d for %s = %d kHz\n",
+                        rate_id,
+                        mode_id == XFERRATE_MODE_I3C ? "I3C" :
+                        mode_id == XFERRATE_MODE_I2C ? "I2C" :
+                        "unknown mode",
+                        rate);
+               base += 4;
+       }
+
+       return 0;
+}
+
+static int hci_extcap_auto_command(struct i3c_hci *hci, void __iomem *base)
+{
+       u32 autocmd_ext_caps = readl(base + 0x04);
+       unsigned int max_count = FIELD_GET(GENMASK(3, 0), autocmd_ext_caps);
+       u32 autocmd_ext_config = readl(base + 0x08);
+       unsigned int count = FIELD_GET(GENMASK(3, 0), autocmd_ext_config);
+
+       dev_info(&hci->master.dev, "%d/%d active auto-command entries\n",
+                count, max_count);
+       /* remember auto-command register location for later use */
+       hci->AUTOCMD_regs = base;
+       return 0;
+}
+
+static int hci_extcap_debug(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "debug registers present\n");
+       hci->DEBUG_regs = base;
+       return 0;
+}
+
+static int hci_extcap_scheduled_cmd(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "scheduled commands available\n");
+       /* hci->schedcmd_regs = base; */
+       return 0;
+}
+
+static int hci_extcap_non_curr_master(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "Non-Current Master support available\n");
+       /* hci->NCM_regs = base; */
+       return 0;
+}
+
+static int hci_extcap_ccc_resp_conf(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "CCC Response Configuration available\n");
+       return 0;
+}
+
+static int hci_extcap_global_DAT(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "Global DAT available\n");
+       return 0;
+}
+
+static int hci_extcap_multilane(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "Master Multi-Lane support available\n");
+       return 0;
+}
+
+static int hci_extcap_ncm_multilane(struct i3c_hci *hci, void __iomem *base)
+{
+       dev_info(&hci->master.dev, "NCM Multi-Lane support available\n");
+       return 0;
+}
+
+struct hci_ext_caps {
+       u8  id;
+       u16 min_length;
+       int (*parser)(struct i3c_hci *hci, void __iomem *base);
+};
+
+#define EXT_CAP(_id, _highest_mandatory_reg_offset, _parser) \
+       { .id = (_id), .parser = (_parser), \
+         .min_length = (_highest_mandatory_reg_offset)/4 + 1 }
+
+static const struct hci_ext_caps ext_capabilities[] = {
+       EXT_CAP(0x01, 0x0c, hci_extcap_hardware_id),
+       EXT_CAP(0x02, 0x04, hci_extcap_master_config),
+       EXT_CAP(0x03, 0x04, hci_extcap_multi_bus),
+       EXT_CAP(0x04, 0x24, hci_extcap_xfer_modes),
+       EXT_CAP(0x05, 0x08, hci_extcap_auto_command),
+       EXT_CAP(0x08, 0x40, hci_extcap_xfer_rates),
+       EXT_CAP(0x0c, 0x10, hci_extcap_debug),
+       EXT_CAP(0x0d, 0x0c, hci_extcap_scheduled_cmd),
+       EXT_CAP(0x0e, 0x80, hci_extcap_non_curr_master), /* TODO confirm size */
+       EXT_CAP(0x0f, 0x04, hci_extcap_ccc_resp_conf),
+       EXT_CAP(0x10, 0x08, hci_extcap_global_DAT),
+       EXT_CAP(0x9d, 0x04,     hci_extcap_multilane),
+       EXT_CAP(0x9e, 0x04, hci_extcap_ncm_multilane),
+};
+
+static int hci_extcap_vendor_NXP(struct i3c_hci *hci, void __iomem *base)
+{
+       hci->vendor_data = (__force void *)base;
+       dev_info(&hci->master.dev, "Build Date Info = %#x\n", readl(base + 1*4));
+       /* reset the FPGA */
+       writel(0xdeadbeef, base + 1*4);
+       return 0;
+}
+
+struct hci_ext_cap_vendor_specific {
+       u32 vendor;
+       u8  cap;
+       u16 min_length;
+       int (*parser)(struct i3c_hci *hci, void __iomem *base);
+};
+
+#define EXT_CAP_VENDOR(_vendor, _cap, _highest_mandatory_reg_offset) \
+       { .vendor = (MIPI_VENDOR_##_vendor), .cap = (_cap), \
+         .parser = (hci_extcap_vendor_##_vendor), \
+         .min_length = (_highest_mandatory_reg_offset)/4 + 1 }
+
+static const struct hci_ext_cap_vendor_specific vendor_ext_caps[] = {
+       EXT_CAP_VENDOR(NXP, 0xc0, 0x20),
+};
+
+static int hci_extcap_vendor_specific(struct i3c_hci *hci, void __iomem *base,
+                                     u32 cap_id, u32 cap_length)
+{
+       const struct hci_ext_cap_vendor_specific *vendor_cap_entry;
+       int i;
+
+       vendor_cap_entry = NULL;
+       for (i = 0; i < ARRAY_SIZE(vendor_ext_caps); i++) {
+               if (vendor_ext_caps[i].vendor == hci->vendor_mipi_id &&
+                   vendor_ext_caps[i].cap == cap_id) {
+                       vendor_cap_entry = &vendor_ext_caps[i];
+                       break;
+               }
+       }
+
+       if (!vendor_cap_entry) {
+               dev_notice(&hci->master.dev,
+                          "unknown ext_cap 0x%02x for vendor 0x%02x\n",
+                          cap_id, hci->vendor_mipi_id);
+               return 0;
+       }
+       if (cap_length < vendor_cap_entry->min_length) {
+               dev_err(&hci->master.dev,
+                       "ext_cap 0x%02x has size %d (expecting >= %d)\n",
+                       cap_id, cap_length, vendor_cap_entry->min_length);
+               return -EINVAL;
+       }
+       return vendor_cap_entry->parser(hci, base);
+}
+
+int i3c_hci_parse_ext_caps(struct i3c_hci *hci)
+{
+       void __iomem *curr_cap = hci->EXTCAPS_regs;
+       void __iomem *end = curr_cap + 0x1000; /* some arbitrary limit */
+       u32 cap_header, cap_id, cap_length;
+       const struct hci_ext_caps *cap_entry;
+       int i, err = 0;
+
+       if (!curr_cap)
+               return 0;
+
+       for (; !err && curr_cap < end; curr_cap += cap_length * 4) {
+               cap_header = readl(curr_cap);
+               cap_id = FIELD_GET(CAP_HEADER_ID, cap_header);
+               cap_length = FIELD_GET(CAP_HEADER_LENGTH, cap_header);
+               DBG("id=0x%02x length=%d", cap_id, cap_length);
+               if (!cap_length)
+                       break;
+               if (curr_cap + cap_length * 4 >= end) {
+                       dev_err(&hci->master.dev,
+                               "ext_cap 0x%02x has size %d (too big)\n",
+                               cap_id, cap_length);
+                       err = -EINVAL;
+                       break;
+               }
+
+               if (cap_id >= 0xc0 && cap_id <= 0xcf) {
+                       err = hci_extcap_vendor_specific(hci, curr_cap,
+                                                        cap_id, cap_length);
+                       continue;
+               }
+
+               cap_entry = NULL;
+               for (i = 0; i < ARRAY_SIZE(ext_capabilities); i++) {
+                       if (ext_capabilities[i].id == cap_id) {
+                               cap_entry = &ext_capabilities[i];
+                               break;
+                       }
+               }
+               if (!cap_entry) {
+                       dev_notice(&hci->master.dev,
+                                  "unknown ext_cap 0x%02x\n", cap_id);
+               } else if (cap_length < cap_entry->min_length) {
+                       dev_err(&hci->master.dev,
+                               "ext_cap 0x%02x has size %d (expecting >= %d)\n",
+                               cap_id, cap_length, cap_entry->min_length);
+                       err = -EINVAL;
+               } else {
+                       err = cap_entry->parser(hci, curr_cap);
+               }
+       }
+       return err;
+}
diff --git a/drivers/i3c/master/mipi-i3c-hci/ext_caps.h b/drivers/i3c/master/mipi-i3c-hci/ext_caps.h
new file mode 100644 (file)
index 0000000..9df1782
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Extended Capability Definitions
+ */
+
+#ifndef EXTCAPS_H
+#define EXTCAPS_H
+
+/* MIPI vendor IDs */
+#define MIPI_VENDOR_NXP                        0x11b
+
+
+int i3c_hci_parse_ext_caps(struct i3c_hci *hci);
+
+#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/hci.h b/drivers/i3c/master/mipi-i3c-hci/hci.h
new file mode 100644 (file)
index 0000000..80beb1d
--- /dev/null
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Common HCI stuff
+ */
+
+#ifndef HCI_H
+#define HCI_H
+
+
+/* Handy logging macro to save on line length */
+#define DBG(x, ...) pr_devel("%s: " x "\n", __func__, ##__VA_ARGS__)
+
+/* 32-bit word aware bit and mask macros */
+#define W0_MASK(h, l)  GENMASK((h) - 0,  (l) - 0)
+#define W1_MASK(h, l)  GENMASK((h) - 32, (l) - 32)
+#define W2_MASK(h, l)  GENMASK((h) - 64, (l) - 64)
+#define W3_MASK(h, l)  GENMASK((h) - 96, (l) - 96)
+
+/* Same for single bit macros (trailing _ to align with W*_MASK width) */
+#define W0_BIT_(x)  BIT((x) - 0)
+#define W1_BIT_(x)  BIT((x) - 32)
+#define W2_BIT_(x)  BIT((x) - 64)
+#define W3_BIT_(x)  BIT((x) - 96)
+
+
+struct hci_cmd_ops;
+
+/* Our main structure */
+struct i3c_hci {
+       struct i3c_master_controller master;
+       void __iomem *base_regs;
+       void __iomem *DAT_regs;
+       void __iomem *DCT_regs;
+       void __iomem *RHS_regs;
+       void __iomem *PIO_regs;
+       void __iomem *EXTCAPS_regs;
+       void __iomem *AUTOCMD_regs;
+       void __iomem *DEBUG_regs;
+       const struct hci_io_ops *io;
+       void *io_data;
+       const struct hci_cmd_ops *cmd;
+       atomic_t next_cmd_tid;
+       u32 caps;
+       unsigned int quirks;
+       unsigned int DAT_entries;
+       unsigned int DAT_entry_size;
+       void *DAT_data;
+       unsigned int DCT_entries;
+       unsigned int DCT_entry_size;
+       u8 version_major;
+       u8 version_minor;
+       u8 revision;
+       u32 vendor_mipi_id;
+       u32 vendor_version_id;
+       u32 vendor_product_id;
+       void *vendor_data;
+};
+
+
+/*
+ * Structure to represent a master initiated transfer.
+ * The rnw, data and data_len fields must be initialized before calling any
+ * hci->cmd->*() method. The cmd method will initialize cmd_desc[] and
+ * possibly modify (clear) the data field. Then xfer->cmd_desc[0] can
+ * be augmented with CMD_0_ROC and/or CMD_0_TOC.
+ * The completion field needs to be initialized before queueing with
+ * hci->io->queue_xfer(), and requires CMD_0_ROC to be set.
+ */
+struct hci_xfer {
+       u32 cmd_desc[4];
+       u32 response;
+       bool rnw;
+       void *data;
+       unsigned int data_len;
+       unsigned int cmd_tid;
+       struct completion *completion;
+       union {
+               struct {
+                       /* PIO specific */
+                       struct hci_xfer *next_xfer;
+                       struct hci_xfer *next_data;
+                       struct hci_xfer *next_resp;
+                       unsigned int data_left;
+                       u32 data_word_before_partial;
+               };
+               struct {
+                       /* DMA specific */
+                       dma_addr_t data_dma;
+                       int ring_number;
+                       int ring_entry;
+               };
+       };
+};
+
+static inline struct hci_xfer *hci_alloc_xfer(unsigned int n)
+{
+       return kzalloc(sizeof(struct hci_xfer) * n, GFP_KERNEL);
+}
+
+static inline void hci_free_xfer(struct hci_xfer *xfer, unsigned int n)
+{
+       kfree(xfer);
+}
+
+
+/* This abstracts PIO vs DMA operations */
+struct hci_io_ops {
+       bool (*irq_handler)(struct i3c_hci *hci, unsigned int mask);
+       int (*queue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n);
+       bool (*dequeue_xfer)(struct i3c_hci *hci, struct hci_xfer *xfer, int n);
+       int (*request_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev,
+                          const struct i3c_ibi_setup *req);
+       void (*free_ibi)(struct i3c_hci *hci, struct i3c_dev_desc *dev);
+       void (*recycle_ibi_slot)(struct i3c_hci *hci, struct i3c_dev_desc *dev,
+                               struct i3c_ibi_slot *slot);
+       int (*init)(struct i3c_hci *hci);
+       void (*cleanup)(struct i3c_hci *hci);
+};
+
+extern const struct hci_io_ops mipi_i3c_hci_pio;
+extern const struct hci_io_ops mipi_i3c_hci_dma;
+
+
+/* Our per device master private data */
+struct i3c_hci_dev_data {
+       int dat_idx;
+       void *ibi_data;
+};
+
+
+/* list of quirks */
+#define HCI_QUIRK_RAW_CCC      BIT(1)  /* CCC framing must be explicit */
+
+
+/* global functions */
+void mipi_i3c_hci_resume(struct i3c_hci *hci);
+void mipi_i3c_hci_pio_reset(struct i3c_hci *hci);
+void mipi_i3c_hci_dct_index_reset(struct i3c_hci *hci);
+
+#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/ibi.h b/drivers/i3c/master/mipi-i3c-hci/ibi.h
new file mode 100644 (file)
index 0000000..e1f98e2
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Common IBI related stuff
+ */
+
+#ifndef IBI_H
+#define IBI_H
+
+/*
+ * IBI Status Descriptor bits
+ */
+#define IBI_STS                                BIT(31)
+#define IBI_ERROR                      BIT(30)
+#define IBI_STATUS_TYPE                        BIT(29)
+#define IBI_HW_CONTEXT                 GENMASK(28, 26)
+#define IBI_TS                         BIT(25)
+#define IBI_LAST_STATUS                        BIT(24)
+#define IBI_CHUNKS                     GENMASK(23, 16)
+#define IBI_ID                         GENMASK(15, 8)
+#define IBI_TARGET_ADDR                        GENMASK(15, 9)
+#define IBI_TARGET_RNW                 BIT(8)
+#define IBI_DATA_LENGTH                        GENMASK(7, 0)
+
+/*  handy helpers */
+static inline struct i3c_dev_desc *
+i3c_hci_addr_to_dev(struct i3c_hci *hci, unsigned int addr)
+{
+       struct i3c_bus *bus = i3c_master_get_bus(&hci->master);
+       struct i3c_dev_desc *dev;
+
+       i3c_bus_for_each_i3cdev(bus, dev) {
+               if (dev->info.dyn_addr == addr)
+                       return dev;
+       }
+       return NULL;
+}
+
+#endif
diff --git a/drivers/i3c/master/mipi-i3c-hci/pio.c b/drivers/i3c/master/mipi-i3c-hci/pio.c
new file mode 100644 (file)
index 0000000..d0272aa
--- /dev/null
@@ -0,0 +1,1041 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/i3c/master.h>
+#include <linux/io.h>
+
+#include "hci.h"
+#include "cmd.h"
+#include "ibi.h"
+
+
+/*
+ * PIO Access Area
+ */
+
+#define pio_reg_read(r)                readl(hci->PIO_regs + (PIO_##r))
+#define pio_reg_write(r, v)    writel(v, hci->PIO_regs + (PIO_##r))
+
+#define PIO_COMMAND_QUEUE_PORT         0x00
+#define PIO_RESPONSE_QUEUE_PORT                0x04
+#define PIO_XFER_DATA_PORT             0x08
+#define PIO_IBI_PORT                   0x0c
+
+#define PIO_QUEUE_THLD_CTRL            0x10
+#define QUEUE_IBI_STATUS_THLD          GENMASK(31, 24)
+#define QUEUE_IBI_DATA_THLD            GENMASK(23, 16)
+#define QUEUE_RESP_BUF_THLD            GENMASK(15, 8)
+#define QUEUE_CMD_EMPTY_BUF_THLD       GENMASK(7, 0)
+
+#define PIO_DATA_BUFFER_THLD_CTRL      0x14
+#define DATA_RX_START_THLD             GENMASK(26, 24)
+#define DATA_TX_START_THLD             GENMASK(18, 16)
+#define DATA_RX_BUF_THLD               GENMASK(10, 8)
+#define DATA_TX_BUF_THLD               GENMASK(2, 0)
+
+#define PIO_QUEUE_SIZE                 0x18
+#define TX_DATA_BUFFER_SIZE            GENMASK(31, 24)
+#define RX_DATA_BUFFER_SIZE            GENMASK(23, 16)
+#define IBI_STATUS_SIZE                        GENMASK(15, 8)
+#define CR_QUEUE_SIZE                  GENMASK(7, 0)
+
+#define PIO_INTR_STATUS                        0x20
+#define PIO_INTR_STATUS_ENABLE         0x24
+#define PIO_INTR_SIGNAL_ENABLE         0x28
+#define PIO_INTR_FORCE                 0x2c
+#define STAT_TRANSFER_BLOCKED          BIT(25)
+#define STAT_PERR_RESP_UFLOW           BIT(24)
+#define STAT_PERR_CMD_OFLOW            BIT(23)
+#define STAT_PERR_IBI_UFLOW            BIT(22)
+#define STAT_PERR_RX_UFLOW             BIT(21)
+#define STAT_PERR_TX_OFLOW             BIT(20)
+#define STAT_ERR_RESP_QUEUE_FULL       BIT(19)
+#define STAT_WARN_RESP_QUEUE_FULL      BIT(18)
+#define STAT_ERR_IBI_QUEUE_FULL                BIT(17)
+#define STAT_WARN_IBI_QUEUE_FULL       BIT(16)
+#define STAT_ERR_RX_DATA_FULL          BIT(15)
+#define STAT_WARN_RX_DATA_FULL         BIT(14)
+#define STAT_ERR_TX_DATA_EMPTY         BIT(13)
+#define STAT_WARN_TX_DATA_EMPTY                BIT(12)
+#define STAT_TRANSFER_ERR              BIT(9)
+#define STAT_WARN_INS_STOP_MODE                BIT(7)
+#define STAT_TRANSFER_ABORT            BIT(5)
+#define STAT_RESP_READY                        BIT(4)
+#define STAT_CMD_QUEUE_READY           BIT(3)
+#define STAT_IBI_STATUS_THLD           BIT(2)
+#define STAT_RX_THLD                   BIT(1)
+#define STAT_TX_THLD                   BIT(0)
+
+#define PIO_QUEUE_CUR_STATUS           0x38
+#define CUR_IBI_Q_LEVEL                        GENMASK(28, 20)
+#define CUR_RESP_Q_LEVEL               GENMASK(18, 10)
+#define CUR_CMD_Q_EMPTY_LEVEL          GENMASK(8, 0)
+
+#define PIO_DATA_BUFFER_CUR_STATUS     0x3c
+#define CUR_RX_BUF_LVL                 GENMASK(26, 16)
+#define CUR_TX_BUF_LVL                 GENMASK(10, 0)
+
+/*
+ * Handy status bit combinations
+ */
+
+#define STAT_LATENCY_WARNINGS          (STAT_WARN_RESP_QUEUE_FULL | \
+                                        STAT_WARN_IBI_QUEUE_FULL | \
+                                        STAT_WARN_RX_DATA_FULL | \
+                                        STAT_WARN_TX_DATA_EMPTY | \
+                                        STAT_WARN_INS_STOP_MODE)
+
+#define STAT_LATENCY_ERRORS            (STAT_ERR_RESP_QUEUE_FULL | \
+                                        STAT_ERR_IBI_QUEUE_FULL | \
+                                        STAT_ERR_RX_DATA_FULL | \
+                                        STAT_ERR_TX_DATA_EMPTY)
+
+#define STAT_PROG_ERRORS               (STAT_TRANSFER_BLOCKED | \
+                                        STAT_PERR_RESP_UFLOW | \
+                                        STAT_PERR_CMD_OFLOW | \
+                                        STAT_PERR_IBI_UFLOW | \
+                                        STAT_PERR_RX_UFLOW | \
+                                        STAT_PERR_TX_OFLOW)
+
+#define STAT_ALL_ERRORS                        (STAT_TRANSFER_ABORT | \
+                                        STAT_TRANSFER_ERR | \
+                                        STAT_LATENCY_ERRORS | \
+                                        STAT_PROG_ERRORS)
+
+struct hci_pio_dev_ibi_data {
+       struct i3c_generic_ibi_pool *pool;
+       unsigned int max_len;
+};
+
+struct hci_pio_ibi_data {
+       struct i3c_ibi_slot *slot;
+       void *data_ptr;
+       unsigned int addr;
+       unsigned int seg_len, seg_cnt;
+       unsigned int max_len;
+       bool last_seg;
+};
+
+struct hci_pio_data {
+       spinlock_t lock;
+       struct hci_xfer *curr_xfer, *xfer_queue;
+       struct hci_xfer *curr_rx, *rx_queue;
+       struct hci_xfer *curr_tx, *tx_queue;
+       struct hci_xfer *curr_resp, *resp_queue;
+       struct hci_pio_ibi_data ibi;
+       unsigned int rx_thresh_size, tx_thresh_size;
+       unsigned int max_ibi_thresh;
+       u32 reg_queue_thresh;
+       u32 enabled_irqs;
+};
+
+static int hci_pio_init(struct i3c_hci *hci)
+{
+       struct hci_pio_data *pio;
+       u32 val, size_val, rx_thresh, tx_thresh, ibi_val;
+
+       pio = kzalloc(sizeof(*pio), GFP_KERNEL);
+       if (!pio)
+               return -ENOMEM;
+
+       hci->io_data = pio;
+       spin_lock_init(&pio->lock);
+
+       size_val = pio_reg_read(QUEUE_SIZE);
+       dev_info(&hci->master.dev, "CMD/RESP FIFO = %ld entries\n",
+                FIELD_GET(CR_QUEUE_SIZE, size_val));
+       dev_info(&hci->master.dev, "IBI FIFO = %ld bytes\n",
+                4 * FIELD_GET(IBI_STATUS_SIZE, size_val));
+       dev_info(&hci->master.dev, "RX data FIFO = %d bytes\n",
+                4 * (2 << FIELD_GET(RX_DATA_BUFFER_SIZE, size_val)));
+       dev_info(&hci->master.dev, "TX data FIFO = %d bytes\n",
+                4 * (2 << FIELD_GET(TX_DATA_BUFFER_SIZE, size_val)));
+
+       /*
+        * Let's initialize data thresholds to half of the actual FIFO size.
+        * The start thresholds aren't used (set to 0) as the FIFO is always
+        * serviced before the corresponding command is queued.
+        */
+       rx_thresh = FIELD_GET(RX_DATA_BUFFER_SIZE, size_val);
+       tx_thresh = FIELD_GET(TX_DATA_BUFFER_SIZE, size_val);
+       if (hci->version_major == 1) {
+               /* those are expressed as 2^[n+1), so just sub 1 if not 0 */
+               if (rx_thresh)
+                       rx_thresh -= 1;
+               if (tx_thresh)
+                       tx_thresh -= 1;
+               pio->rx_thresh_size = 2 << rx_thresh;
+               pio->tx_thresh_size = 2 << tx_thresh;
+       } else {
+               /* size is 2^(n+1) and threshold is 2^n i.e. already halved */
+               pio->rx_thresh_size = 1 << rx_thresh;
+               pio->tx_thresh_size = 1 << tx_thresh;
+       }
+       val = FIELD_PREP(DATA_RX_BUF_THLD,   rx_thresh) |
+             FIELD_PREP(DATA_TX_BUF_THLD,   tx_thresh);
+       pio_reg_write(DATA_BUFFER_THLD_CTRL, val);
+
+       /*
+        * Let's raise an interrupt as soon as there is one free cmd slot
+        * or one available response or IBI. For IBI data let's use half the
+        * IBI queue size within allowed bounds.
+        */
+       ibi_val = FIELD_GET(IBI_STATUS_SIZE, size_val);
+       pio->max_ibi_thresh = clamp_val(ibi_val/2, 1, 63);
+       val = FIELD_PREP(QUEUE_IBI_STATUS_THLD, 1) |
+             FIELD_PREP(QUEUE_IBI_DATA_THLD, pio->max_ibi_thresh) |
+             FIELD_PREP(QUEUE_RESP_BUF_THLD, 1) |
+             FIELD_PREP(QUEUE_CMD_EMPTY_BUF_THLD, 1);
+       pio_reg_write(QUEUE_THLD_CTRL, val);
+       pio->reg_queue_thresh = val;
+
+       /* Disable all IRQs but allow all status bits */
+       pio_reg_write(INTR_SIGNAL_ENABLE, 0x0);
+       pio_reg_write(INTR_STATUS_ENABLE, 0xffffffff);
+
+       /* Always accept error interrupts (will be activated on first xfer) */
+       pio->enabled_irqs = STAT_ALL_ERRORS;
+
+       return 0;
+}
+
+static void hci_pio_cleanup(struct i3c_hci *hci)
+{
+       struct hci_pio_data *pio = hci->io_data;
+
+       pio_reg_write(INTR_SIGNAL_ENABLE, 0x0);
+
+       if (pio) {
+               DBG("status = %#x/%#x",
+                   pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE));
+               BUG_ON(pio->curr_xfer);
+               BUG_ON(pio->curr_rx);
+               BUG_ON(pio->curr_tx);
+               BUG_ON(pio->curr_resp);
+               kfree(pio);
+               hci->io_data = NULL;
+       }
+}
+
+static void hci_pio_write_cmd(struct i3c_hci *hci, struct hci_xfer *xfer)
+{
+       DBG("cmd_desc[%d] = 0x%08x", 0, xfer->cmd_desc[0]);
+       DBG("cmd_desc[%d] = 0x%08x", 1, xfer->cmd_desc[1]);
+       pio_reg_write(COMMAND_QUEUE_PORT, xfer->cmd_desc[0]);
+       pio_reg_write(COMMAND_QUEUE_PORT, xfer->cmd_desc[1]);
+       if (hci->cmd == &mipi_i3c_hci_cmd_v2) {
+               DBG("cmd_desc[%d] = 0x%08x", 2, xfer->cmd_desc[2]);
+               DBG("cmd_desc[%d] = 0x%08x", 3, xfer->cmd_desc[3]);
+               pio_reg_write(COMMAND_QUEUE_PORT, xfer->cmd_desc[2]);
+               pio_reg_write(COMMAND_QUEUE_PORT, xfer->cmd_desc[3]);
+       }
+}
+
+static bool hci_pio_do_rx(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_xfer *xfer = pio->curr_rx;
+       unsigned int nr_words;
+       u32 *p;
+
+       p = xfer->data;
+       p += (xfer->data_len - xfer->data_left) / 4;
+
+       while (xfer->data_left >= 4) {
+               /* bail out if FIFO hasn't reached the threshold value yet */
+               if (!(pio_reg_read(INTR_STATUS) & STAT_RX_THLD))
+                       return false;
+               nr_words = min(xfer->data_left / 4, pio->rx_thresh_size);
+               /* extract data from FIFO */
+               xfer->data_left -= nr_words * 4;
+               DBG("now %d left %d", nr_words * 4, xfer->data_left);
+               while (nr_words--)
+                       *p++ = pio_reg_read(XFER_DATA_PORT);
+       }
+
+       /* trailing data is retrieved upon response reception */
+       return !xfer->data_left;
+}
+
+static void hci_pio_do_trailing_rx(struct i3c_hci *hci,
+                                  struct hci_pio_data *pio, unsigned int count)
+{
+       struct hci_xfer *xfer = pio->curr_rx;
+       u32 *p;
+
+       DBG("%d remaining", count);
+
+       p = xfer->data;
+       p += (xfer->data_len - xfer->data_left) / 4;
+
+       if (count >= 4) {
+               unsigned int nr_words = count / 4;
+               /* extract data from FIFO */
+               xfer->data_left -= nr_words * 4;
+               DBG("now %d left %d", nr_words * 4, xfer->data_left);
+               while (nr_words--)
+                       *p++ = pio_reg_read(XFER_DATA_PORT);
+       }
+
+       count &= 3;
+       if (count) {
+               /*
+                * There are trailing bytes in the last word.
+                * Fetch it and extract bytes in an endian independent way.
+                * Unlike the TX case, we must not write memory past the
+                * end of the destination buffer.
+                */
+               u8 *p_byte = (u8 *)p;
+               u32 data = pio_reg_read(XFER_DATA_PORT);
+
+               xfer->data_word_before_partial = data;
+               xfer->data_left -= count;
+               data = (__force u32) cpu_to_le32(data);
+               while (count--) {
+                       *p_byte++ = data;
+                       data >>= 8;
+               }
+       }
+}
+
+static bool hci_pio_do_tx(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_xfer *xfer = pio->curr_tx;
+       unsigned int nr_words;
+       u32 *p;
+
+       p = xfer->data;
+       p += (xfer->data_len - xfer->data_left) / 4;
+
+       while (xfer->data_left >= 4) {
+               /* bail out if FIFO free space is below set threshold */
+               if (!(pio_reg_read(INTR_STATUS) & STAT_TX_THLD))
+                       return false;
+               /* we can fill up to that TX threshold */
+               nr_words = min(xfer->data_left / 4, pio->tx_thresh_size);
+               /* push data into the FIFO */
+               xfer->data_left -= nr_words * 4;
+               DBG("now %d left %d", nr_words * 4, xfer->data_left);
+               while (nr_words--)
+                       pio_reg_write(XFER_DATA_PORT, *p++);
+       }
+
+       if (xfer->data_left) {
+               /*
+                * There are trailing bytes to send. We can simply load
+                * them from memory as a word which will keep those bytes
+                * in their proper place even on a BE system. This will
+                * also get some bytes past the actual buffer but no one
+                * should care as they won't be sent out.
+                */
+               if (!(pio_reg_read(INTR_STATUS) & STAT_TX_THLD))
+                       return false;
+               DBG("trailing %d", xfer->data_left);
+               pio_reg_write(XFER_DATA_PORT, *p);
+               xfer->data_left = 0;
+       }
+
+       return true;
+}
+
+static bool hci_pio_process_rx(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       while (pio->curr_rx && hci_pio_do_rx(hci, pio))
+               pio->curr_rx = pio->curr_rx->next_data;
+       return !pio->curr_rx;
+}
+
+static bool hci_pio_process_tx(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       while (pio->curr_tx && hci_pio_do_tx(hci, pio))
+               pio->curr_tx = pio->curr_tx->next_data;
+       return !pio->curr_tx;
+}
+
+static void hci_pio_queue_data(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_xfer *xfer = pio->curr_xfer;
+       struct hci_xfer *prev_queue_tail;
+
+       if (!xfer->data) {
+               xfer->data_len = xfer->data_left = 0;
+               return;
+       }
+
+       if (xfer->rnw) {
+               prev_queue_tail = pio->rx_queue;
+               pio->rx_queue = xfer;
+               if (pio->curr_rx) {
+                       prev_queue_tail->next_data = xfer;
+               } else {
+                       pio->curr_rx = xfer;
+                       if (!hci_pio_process_rx(hci, pio))
+                               pio->enabled_irqs |= STAT_RX_THLD;
+               }
+       } else {
+               prev_queue_tail = pio->tx_queue;
+               pio->tx_queue = xfer;
+               if (pio->curr_tx) {
+                       prev_queue_tail->next_data = xfer;
+               } else {
+                       pio->curr_tx = xfer;
+                       if (!hci_pio_process_tx(hci, pio))
+                               pio->enabled_irqs |= STAT_TX_THLD;
+               }
+       }
+}
+
+static void hci_pio_push_to_next_rx(struct i3c_hci *hci, struct hci_xfer *xfer,
+                                   unsigned int words_to_keep)
+{
+       u32 *from = xfer->data;
+       u32 from_last;
+       unsigned int received, count;
+
+       received = (xfer->data_len - xfer->data_left) / 4;
+       if ((xfer->data_len - xfer->data_left) & 3) {
+               from_last = xfer->data_word_before_partial;
+               received += 1;
+       } else {
+               from_last = from[received];
+       }
+       from += words_to_keep;
+       count = received - words_to_keep;
+
+       while (count) {
+               unsigned int room, left, chunk, bytes_to_move;
+               u32 last_word;
+
+               xfer = xfer->next_data;
+               if (!xfer) {
+                       dev_err(&hci->master.dev, "pushing RX data to unexistent xfer\n");
+                       return;
+               }
+
+               room = DIV_ROUND_UP(xfer->data_len, 4);
+               left = DIV_ROUND_UP(xfer->data_left, 4);
+               chunk = min(count, room);
+               if (chunk > left) {
+                       hci_pio_push_to_next_rx(hci, xfer, chunk - left);
+                       left = chunk;
+                       xfer->data_left = left * 4;
+               }
+
+               bytes_to_move = xfer->data_len - xfer->data_left;
+               if (bytes_to_move & 3) {
+                       /* preserve word  to become partial */
+                       u32 *p = xfer->data;
+
+                       xfer->data_word_before_partial = p[bytes_to_move / 4];
+               }
+               memmove(xfer->data + chunk, xfer->data, bytes_to_move);
+
+               /* treat last word specially because of partial word issues */
+               chunk -= 1;
+
+               memcpy(xfer->data, from, chunk * 4);
+               xfer->data_left -= chunk * 4;
+               from += chunk;
+               count -= chunk;
+
+               last_word = (count == 1) ? from_last : *from++;
+               if (xfer->data_left < 4) {
+                       /*
+                        * Like in hci_pio_do_trailing_rx(), preserve original
+                        * word to be stored partially then store bytes it
+                        * in an endian independent way.
+                        */
+                       u8 *p_byte = xfer->data;
+
+                       p_byte += chunk * 4;
+                       xfer->data_word_before_partial = last_word;
+                       last_word = (__force u32) cpu_to_le32(last_word);
+                       while (xfer->data_left--) {
+                               *p_byte++ = last_word;
+                               last_word >>= 8;
+                       }
+               } else {
+                       u32 *p = xfer->data;
+
+                       p[chunk] = last_word;
+                       xfer->data_left -= 4;
+               }
+               count--;
+       }
+}
+
+static void hci_pio_err(struct i3c_hci *hci, struct hci_pio_data *pio,
+                       u32 status);
+
+static bool hci_pio_process_resp(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       while (pio->curr_resp &&
+              (pio_reg_read(INTR_STATUS) & STAT_RESP_READY)) {
+               struct hci_xfer *xfer = pio->curr_resp;
+               u32 resp = pio_reg_read(RESPONSE_QUEUE_PORT);
+               unsigned int tid = RESP_TID(resp);
+
+               DBG("resp = 0x%08x", resp);
+               if (tid != xfer->cmd_tid) {
+                       dev_err(&hci->master.dev,
+                               "response tid=%d when expecting %d\n",
+                               tid, xfer->cmd_tid);
+                       /* let's pretend it is a prog error... any of them  */
+                       hci_pio_err(hci, pio, STAT_PROG_ERRORS);
+                       return false;
+               }
+               xfer->response = resp;
+
+               if (pio->curr_rx == xfer) {
+                       /*
+                        * Response availability implies RX completion.
+                        * Retrieve trailing RX data if any.
+                        * Note that short reads are possible.
+                        */
+                       unsigned int received, expected, to_keep;
+
+                       received = xfer->data_len - xfer->data_left;
+                       expected = RESP_DATA_LENGTH(xfer->response);
+                       if (expected > received) {
+                               hci_pio_do_trailing_rx(hci, pio,
+                                                      expected - received);
+                       } else if (received > expected) {
+                               /* we consumed data meant for next xfer */
+                               to_keep = DIV_ROUND_UP(expected, 4);
+                               hci_pio_push_to_next_rx(hci, xfer, to_keep);
+                       }
+
+                       /* then process the RX list pointer */
+                       if (hci_pio_process_rx(hci, pio))
+                               pio->enabled_irqs &= ~STAT_RX_THLD;
+               }
+
+               /*
+                * We're about to give back ownership of the xfer structure
+                * to the waiting instance. Make sure no reference to it
+                * still exists.
+                */
+               if (pio->curr_rx == xfer) {
+                       DBG("short RX ?");
+                       pio->curr_rx = pio->curr_rx->next_data;
+               } else if (pio->curr_tx == xfer) {
+                       DBG("short TX ?");
+                       pio->curr_tx = pio->curr_tx->next_data;
+               } else if (xfer->data_left) {
+                       DBG("PIO xfer count = %d after response",
+                           xfer->data_left);
+               }
+
+               pio->curr_resp = xfer->next_resp;
+               if (xfer->completion)
+                       complete(xfer->completion);
+       }
+       return !pio->curr_resp;
+}
+
+static void hci_pio_queue_resp(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_xfer *xfer = pio->curr_xfer;
+       struct hci_xfer *prev_queue_tail;
+
+       if (!(xfer->cmd_desc[0] & CMD_0_ROC))
+               return;
+
+       prev_queue_tail = pio->resp_queue;
+       pio->resp_queue = xfer;
+       if (pio->curr_resp) {
+               prev_queue_tail->next_resp = xfer;
+       } else {
+               pio->curr_resp = xfer;
+               if (!hci_pio_process_resp(hci, pio))
+                       pio->enabled_irqs |= STAT_RESP_READY;
+       }
+}
+
+static bool hci_pio_process_cmd(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       while (pio->curr_xfer &&
+              (pio_reg_read(INTR_STATUS) & STAT_CMD_QUEUE_READY)) {
+               /*
+                * Always process the data FIFO before sending the command
+                * so needed TX data or RX space is available upfront.
+                */
+               hci_pio_queue_data(hci, pio);
+               /*
+                * Then queue our response request. This will also process
+                * the response FIFO in case it got suddenly filled up
+                * with results from previous commands.
+                */
+               hci_pio_queue_resp(hci, pio);
+               /*
+                * Finally send the command.
+                */
+               hci_pio_write_cmd(hci, pio->curr_xfer);
+               /*
+                * And move on.
+                */
+               pio->curr_xfer = pio->curr_xfer->next_xfer;
+       }
+       return !pio->curr_xfer;
+}
+
+static int hci_pio_queue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n)
+{
+       struct hci_pio_data *pio = hci->io_data;
+       struct hci_xfer *prev_queue_tail;
+       int i;
+
+       DBG("n = %d", n);
+
+       /* link xfer instances together and initialize data count */
+       for (i = 0; i < n; i++) {
+               xfer[i].next_xfer = (i + 1 < n) ? &xfer[i + 1] : NULL;
+               xfer[i].next_data = NULL;
+               xfer[i].next_resp = NULL;
+               xfer[i].data_left = xfer[i].data_len;
+       }
+
+       spin_lock_irq(&pio->lock);
+       prev_queue_tail = pio->xfer_queue;
+       pio->xfer_queue = &xfer[n - 1];
+       if (pio->curr_xfer) {
+               prev_queue_tail->next_xfer = xfer;
+       } else {
+               pio->curr_xfer = xfer;
+               if (!hci_pio_process_cmd(hci, pio))
+                       pio->enabled_irqs |= STAT_CMD_QUEUE_READY;
+               pio_reg_write(INTR_SIGNAL_ENABLE, pio->enabled_irqs);
+               DBG("status = %#x/%#x",
+                   pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE));
+       }
+       spin_unlock_irq(&pio->lock);
+       return 0;
+}
+
+static bool hci_pio_dequeue_xfer_common(struct i3c_hci *hci,
+                                       struct hci_pio_data *pio,
+                                       struct hci_xfer *xfer, int n)
+{
+       struct hci_xfer *p, **p_prev_next;
+       int i;
+
+       /*
+        * To safely dequeue a transfer request, it must be either entirely
+        * processed, or not yet processed at all. If our request tail is
+        * reachable from either the data or resp list that means the command
+        * was submitted and not yet completed.
+        */
+       for (p = pio->curr_resp; p; p = p->next_resp)
+               for (i = 0; i < n; i++)
+                       if (p == &xfer[i])
+                               goto pio_screwed;
+       for (p = pio->curr_rx; p; p = p->next_data)
+               for (i = 0; i < n; i++)
+                       if (p == &xfer[i])
+                               goto pio_screwed;
+       for (p = pio->curr_tx; p; p = p->next_data)
+               for (i = 0; i < n; i++)
+                       if (p == &xfer[i])
+                               goto pio_screwed;
+
+       /*
+        * The command was completed, or wasn't yet submitted.
+        * Unlink it from the que if the later.
+        */
+       p_prev_next = &pio->curr_xfer;
+       for (p = pio->curr_xfer; p; p = p->next_xfer) {
+               if (p == &xfer[0]) {
+                       *p_prev_next = xfer[n - 1].next_xfer;
+                       break;
+               }
+               p_prev_next = &p->next_xfer;
+       }
+
+       /* return true if we actually unqueued something */
+       return !!p;
+
+pio_screwed:
+       /*
+        * Life is tough. We must invalidate the hardware state and
+        * discard everything that is still queued.
+        */
+       for (p = pio->curr_resp; p; p = p->next_resp) {
+               p->response = FIELD_PREP(RESP_ERR_FIELD, RESP_ERR_HC_TERMINATED);
+               if (p->completion)
+                       complete(p->completion);
+       }
+       for (p = pio->curr_xfer; p; p = p->next_xfer) {
+               p->response = FIELD_PREP(RESP_ERR_FIELD, RESP_ERR_HC_TERMINATED);
+               if (p->completion)
+                       complete(p->completion);
+       }
+       pio->curr_xfer = pio->curr_rx = pio->curr_tx = pio->curr_resp = NULL;
+
+       return true;
+}
+
+static bool hci_pio_dequeue_xfer(struct i3c_hci *hci, struct hci_xfer *xfer, int n)
+{
+       struct hci_pio_data *pio = hci->io_data;
+       int ret;
+
+       spin_lock_irq(&pio->lock);
+       DBG("n=%d status=%#x/%#x", n,
+           pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE));
+       DBG("main_status = %#x/%#x",
+           readl(hci->base_regs + 0x20), readl(hci->base_regs + 0x28));
+
+       ret = hci_pio_dequeue_xfer_common(hci, pio, xfer, n);
+       spin_unlock_irq(&pio->lock);
+       return ret;
+}
+
+static void hci_pio_err(struct i3c_hci *hci, struct hci_pio_data *pio,
+                       u32 status)
+{
+       /* TODO: this ought to be more sophisticated eventually */
+
+       if (pio_reg_read(INTR_STATUS) & STAT_RESP_READY) {
+               /* this may happen when an error is signaled with ROC unset */
+               u32 resp = pio_reg_read(RESPONSE_QUEUE_PORT);
+
+               dev_err(&hci->master.dev,
+                       "orphan response (%#x) on error\n", resp);
+       }
+
+       /* dump states on programming errors */
+       if (status & STAT_PROG_ERRORS) {
+               u32 queue = pio_reg_read(QUEUE_CUR_STATUS);
+               u32 data = pio_reg_read(DATA_BUFFER_CUR_STATUS);
+
+               dev_err(&hci->master.dev,
+                       "prog error %#lx (C/R/I = %ld/%ld/%ld, TX/RX = %ld/%ld)\n",
+                       status & STAT_PROG_ERRORS,
+                       FIELD_GET(CUR_CMD_Q_EMPTY_LEVEL, queue),
+                       FIELD_GET(CUR_RESP_Q_LEVEL, queue),
+                       FIELD_GET(CUR_IBI_Q_LEVEL, queue),
+                       FIELD_GET(CUR_TX_BUF_LVL, data),
+                       FIELD_GET(CUR_RX_BUF_LVL, data));
+       }
+
+       /* just bust out everything with pending responses for now */
+       hci_pio_dequeue_xfer_common(hci, pio, pio->curr_resp, 1);
+       /* ... and half-way TX transfers if any */
+       if (pio->curr_tx && pio->curr_tx->data_left != pio->curr_tx->data_len)
+               hci_pio_dequeue_xfer_common(hci, pio, pio->curr_tx, 1);
+       /* then reset the hardware */
+       mipi_i3c_hci_pio_reset(hci);
+       mipi_i3c_hci_resume(hci);
+
+       DBG("status=%#x/%#x",
+           pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE));
+}
+
+static void hci_pio_set_ibi_thresh(struct i3c_hci *hci,
+                                  struct hci_pio_data *pio,
+                                  unsigned int thresh_val)
+{
+       u32 regval = pio->reg_queue_thresh;
+
+       regval &= ~QUEUE_IBI_STATUS_THLD;
+       regval |= FIELD_PREP(QUEUE_IBI_STATUS_THLD, thresh_val);
+       /* write the threshold reg only if it changes */
+       if (regval != pio->reg_queue_thresh) {
+               pio_reg_write(QUEUE_THLD_CTRL, regval);
+               pio->reg_queue_thresh = regval;
+               DBG("%d", thresh_val);
+       }
+}
+
+static bool hci_pio_get_ibi_segment(struct i3c_hci *hci,
+                                   struct hci_pio_data *pio)
+{
+       struct hci_pio_ibi_data *ibi = &pio->ibi;
+       unsigned int nr_words, thresh_val;
+       u32 *p;
+
+       p = ibi->data_ptr;
+       p += (ibi->seg_len - ibi->seg_cnt) / 4;
+
+       while ((nr_words = ibi->seg_cnt/4)) {
+               /* determine our IBI queue threshold value */
+               thresh_val = min(nr_words, pio->max_ibi_thresh);
+               hci_pio_set_ibi_thresh(hci, pio, thresh_val);
+               /* bail out if we don't have that amount of data ready */
+               if (!(pio_reg_read(INTR_STATUS) & STAT_IBI_STATUS_THLD))
+                       return false;
+               /* extract the data from the IBI port */
+               nr_words = thresh_val;
+               ibi->seg_cnt -= nr_words * 4;
+               DBG("now %d left %d", nr_words * 4, ibi->seg_cnt);
+               while (nr_words--)
+                       *p++ = pio_reg_read(IBI_PORT);
+       }
+
+       if (ibi->seg_cnt) {
+               /*
+                * There are trailing bytes in the last word.
+                * Fetch it and extract bytes in an endian independent way.
+                * Unlike the TX case, we must not write past the end of
+                * the destination buffer.
+                */
+               u32 data;
+               u8 *p_byte = (u8 *)p;
+
+               hci_pio_set_ibi_thresh(hci, pio, 1);
+               if (!(pio_reg_read(INTR_STATUS) & STAT_IBI_STATUS_THLD))
+                       return false;
+               DBG("trailing %d", ibi->seg_cnt);
+               data = pio_reg_read(IBI_PORT);
+               data = (__force u32) cpu_to_le32(data);
+               while (ibi->seg_cnt--) {
+                       *p_byte++ = data;
+                       data >>= 8;
+               }
+       }
+
+       return true;
+}
+
+static bool hci_pio_prep_new_ibi(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_pio_ibi_data *ibi = &pio->ibi;
+       struct i3c_dev_desc *dev;
+       struct i3c_hci_dev_data *dev_data;
+       struct hci_pio_dev_ibi_data *dev_ibi;
+       u32 ibi_status;
+
+       /*
+        * We have a new IBI. Try to set up its payload retrieval.
+        * When returning true, the IBI data has to be consumed whether
+        * or not we are set up to capture it. If we return true with
+        * ibi->slot == NULL that means the data payload has to be
+        * drained out of the IBI port and dropped.
+        */
+
+       ibi_status = pio_reg_read(IBI_PORT);
+       DBG("status = %#x", ibi_status);
+       ibi->addr = FIELD_GET(IBI_TARGET_ADDR, ibi_status);
+       if (ibi_status & IBI_ERROR) {
+               dev_err(&hci->master.dev, "IBI error from %#x\n", ibi->addr);
+               return false;
+       }
+
+       ibi->last_seg = ibi_status & IBI_LAST_STATUS;
+       ibi->seg_len = FIELD_GET(IBI_DATA_LENGTH, ibi_status);
+       ibi->seg_cnt = ibi->seg_len;
+
+       dev = i3c_hci_addr_to_dev(hci, ibi->addr);
+       if (!dev) {
+               dev_err(&hci->master.dev,
+                       "IBI for unknown device %#x\n", ibi->addr);
+               return true;
+       }
+
+       dev_data = i3c_dev_get_master_data(dev);
+       dev_ibi = dev_data->ibi_data;
+       ibi->max_len = dev_ibi->max_len;
+
+       if (ibi->seg_len > ibi->max_len) {
+               dev_err(&hci->master.dev, "IBI payload too big (%d > %d)\n",
+                       ibi->seg_len, ibi->max_len);
+               return true;
+       }
+
+       ibi->slot = i3c_generic_ibi_get_free_slot(dev_ibi->pool);
+       if (!ibi->slot) {
+               dev_err(&hci->master.dev, "no free slot for IBI\n");
+       } else {
+               ibi->slot->len = 0;
+               ibi->data_ptr = ibi->slot->data;
+       }
+       return true;
+}
+
+static void hci_pio_free_ibi_slot(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_pio_ibi_data *ibi = &pio->ibi;
+       struct hci_pio_dev_ibi_data *dev_ibi;
+
+       if (ibi->slot) {
+               dev_ibi = ibi->slot->dev->common.master_priv;
+               i3c_generic_ibi_recycle_slot(dev_ibi->pool, ibi->slot);
+               ibi->slot = NULL;
+       }
+}
+
+static bool hci_pio_process_ibi(struct i3c_hci *hci, struct hci_pio_data *pio)
+{
+       struct hci_pio_ibi_data *ibi = &pio->ibi;
+
+       if (!ibi->slot && !ibi->seg_cnt && ibi->last_seg)
+               if (!hci_pio_prep_new_ibi(hci, pio))
+                       return false;
+
+       for (;;) {
+               u32 ibi_status;
+               unsigned int ibi_addr;
+
+               if (ibi->slot) {
+                       if (!hci_pio_get_ibi_segment(hci, pio))
+                               return false;
+                       ibi->slot->len += ibi->seg_len;
+                       ibi->data_ptr += ibi->seg_len;
+                       if (ibi->last_seg) {
+                               /* was the last segment: submit it and leave */
+                               i3c_master_queue_ibi(ibi->slot->dev, ibi->slot);
+                               ibi->slot = NULL;
+                               hci_pio_set_ibi_thresh(hci, pio, 1);
+                               return true;
+                       }
+               } else if (ibi->seg_cnt) {
+                       /*
+                        * No slot but a non-zero count. This is the result
+                        * of some error and the payload must be drained.
+                        * This normally does not happen therefore no need
+                        * to be extra optimized here.
+                        */
+                       hci_pio_set_ibi_thresh(hci, pio, 1);
+                       do {
+                               if (!(pio_reg_read(INTR_STATUS) & STAT_IBI_STATUS_THLD))
+                                       return false;
+                               pio_reg_read(IBI_PORT);
+                       } while (--ibi->seg_cnt);
+                       if (ibi->last_seg)
+                               return true;
+               }
+
+               /* try to move to the next segment right away */
+               hci_pio_set_ibi_thresh(hci, pio, 1);
+               if (!(pio_reg_read(INTR_STATUS) & STAT_IBI_STATUS_THLD))
+                       return false;
+               ibi_status = pio_reg_read(IBI_PORT);
+               ibi_addr = FIELD_GET(IBI_TARGET_ADDR, ibi_status);
+               if (ibi->addr != ibi_addr) {
+                       /* target address changed before last segment */
+                       dev_err(&hci->master.dev,
+                               "unexp IBI address changed from %d to %d\n",
+                               ibi->addr, ibi_addr);
+                       hci_pio_free_ibi_slot(hci, pio);
+               }
+               ibi->last_seg = ibi_status & IBI_LAST_STATUS;
+               ibi->seg_len = FIELD_GET(IBI_DATA_LENGTH, ibi_status);
+               ibi->seg_cnt = ibi->seg_len;
+               if (ibi->slot && ibi->slot->len + ibi->seg_len > ibi->max_len) {
+                       dev_err(&hci->master.dev,
+                               "IBI payload too big (%d > %d)\n",
+                               ibi->slot->len + ibi->seg_len, ibi->max_len);
+                       hci_pio_free_ibi_slot(hci, pio);
+               }
+       }
+
+       return false;
+}
+
+static int hci_pio_request_ibi(struct i3c_hci *hci, struct i3c_dev_desc *dev,
+                              const struct i3c_ibi_setup *req)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       struct i3c_generic_ibi_pool *pool;
+       struct hci_pio_dev_ibi_data *dev_ibi;
+
+       dev_ibi = kmalloc(sizeof(*dev_ibi), GFP_KERNEL);
+       if (!dev_ibi)
+               return -ENOMEM;
+       pool = i3c_generic_ibi_alloc_pool(dev, req);
+       if (IS_ERR(pool)) {
+               kfree(dev_ibi);
+               return PTR_ERR(pool);
+       }
+       dev_ibi->pool = pool;
+       dev_ibi->max_len = req->max_payload_len;
+       dev_data->ibi_data = dev_ibi;
+       return 0;
+}
+
+static void hci_pio_free_ibi(struct i3c_hci *hci, struct i3c_dev_desc *dev)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       struct hci_pio_dev_ibi_data *dev_ibi = dev_data->ibi_data;
+
+       dev_data->ibi_data = NULL;
+       i3c_generic_ibi_free_pool(dev_ibi->pool);
+       kfree(dev_ibi);
+}
+
+static void hci_pio_recycle_ibi_slot(struct i3c_hci *hci,
+                                   struct i3c_dev_desc *dev,
+                                   struct i3c_ibi_slot *slot)
+{
+       struct i3c_hci_dev_data *dev_data = i3c_dev_get_master_data(dev);
+       struct hci_pio_dev_ibi_data *dev_ibi = dev_data->ibi_data;
+
+       i3c_generic_ibi_recycle_slot(dev_ibi->pool, slot);
+}
+
+static bool hci_pio_irq_handler(struct i3c_hci *hci, unsigned int unused)
+{
+       struct hci_pio_data *pio = hci->io_data;
+       u32 status;
+
+       spin_lock(&pio->lock);
+       status = pio_reg_read(INTR_STATUS);
+       DBG("(in) status: %#x/%#x", status, pio->enabled_irqs);
+       status &= pio->enabled_irqs | STAT_LATENCY_WARNINGS;
+       if (!status) {
+               spin_unlock(&pio->lock);
+               return false;
+       }
+
+       if (status & STAT_IBI_STATUS_THLD)
+               hci_pio_process_ibi(hci, pio);
+
+       if (status & STAT_RX_THLD)
+               if (hci_pio_process_rx(hci, pio))
+                       pio->enabled_irqs &= ~STAT_RX_THLD;
+       if (status & STAT_TX_THLD)
+               if (hci_pio_process_tx(hci, pio))
+                       pio->enabled_irqs &= ~STAT_TX_THLD;
+       if (status & STAT_RESP_READY)
+               if (hci_pio_process_resp(hci, pio))
+                       pio->enabled_irqs &= ~STAT_RESP_READY;
+
+       if (unlikely(status & STAT_LATENCY_WARNINGS)) {
+               pio_reg_write(INTR_STATUS, status & STAT_LATENCY_WARNINGS);
+               dev_warn_ratelimited(&hci->master.dev,
+                                    "encountered warning condition %#lx\n",
+                                    status & STAT_LATENCY_WARNINGS);
+       }
+
+       if (unlikely(status & STAT_ALL_ERRORS)) {
+               pio_reg_write(INTR_STATUS, status & STAT_ALL_ERRORS);
+               hci_pio_err(hci, pio, status & STAT_ALL_ERRORS);
+       }
+
+       if (status & STAT_CMD_QUEUE_READY)
+               if (hci_pio_process_cmd(hci, pio))
+                       pio->enabled_irqs &= ~STAT_CMD_QUEUE_READY;
+
+       pio_reg_write(INTR_SIGNAL_ENABLE, pio->enabled_irqs);
+       DBG("(out) status: %#x/%#x",
+           pio_reg_read(INTR_STATUS), pio_reg_read(INTR_SIGNAL_ENABLE));
+       spin_unlock(&pio->lock);
+       return true;
+}
+
+const struct hci_io_ops mipi_i3c_hci_pio = {
+       .init                   = hci_pio_init,
+       .cleanup                = hci_pio_cleanup,
+       .queue_xfer             = hci_pio_queue_xfer,
+       .dequeue_xfer           = hci_pio_dequeue_xfer,
+       .irq_handler            = hci_pio_irq_handler,
+       .request_ibi            = hci_pio_request_ibi,
+       .free_ibi               = hci_pio_free_ibi,
+       .recycle_ibi_slot       = hci_pio_recycle_ibi_slot,
+};
diff --git a/drivers/i3c/master/mipi-i3c-hci/xfer_mode_rate.h b/drivers/i3c/master/mipi-i3c-hci/xfer_mode_rate.h
new file mode 100644 (file)
index 0000000..1e36b75
--- /dev/null
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/*
+ * Copyright (c) 2020, MIPI Alliance, Inc.
+ *
+ * Author: Nicolas Pitre <npitre@baylibre.com>
+ *
+ * Transfer Mode/Rate Table definitions as found in extended capability
+ * sections 0x04 and 0x08.
+ * This applies starting from I3C HCI v2.0.
+ */
+
+#ifndef XFER_MODE_RATE_H
+#define XFER_MODE_RATE_H
+
+/*
+ * Master Transfer Mode Table Fixed Indexes.
+ *
+ * Indexes 0x0 and 0x8 are mandatory. Availability for the rest must be
+ * obtained from the mode table in the extended capability area.
+ * Presence and definitions for indexes beyond these ones may vary.
+ */
+#define XFERMODE_IDX_I3C_SDR           0x00    /* I3C SDR Mode */
+#define XFERMODE_IDX_I3C_HDR_DDR       0x01    /* I3C HDR-DDR Mode */
+#define XFERMODE_IDX_I3C_HDR_T         0x02    /* I3C HDR-Ternary Mode */
+#define XFERMODE_IDX_I3C_HDR_BT                0x03    /* I3C HDR-BT Mode */
+#define XFERMODE_IDX_I2C               0x08    /* Legacy I2C Mode */
+
+/*
+ * Transfer Mode Table Entry Bits Definitions
+ */
+#define XFERMODE_VALID_XFER_ADD_FUNC   GENMASK(21, 16)
+#define XFERMODE_ML_DATA_XFER_CODING   GENMASK(15, 11)
+#define XFERMODE_ML_ADDL_LANES         GENMASK(10, 8)
+#define XFERMODE_SUPPORTED             BIT(7)
+#define XFERMODE_MODE                  GENMASK(3, 0)
+
+/*
+ * Master Data Transfer Rate Selector Values.
+ *
+ * These are the values to be used in the command descriptor XFER_RATE field
+ * and found in the RATE_ID field below.
+ * The I3C_SDR0, I3C_SDR1, I3C_SDR2, I3C_SDR3, I3C_SDR4 and I2C_FM rates
+ * are required, everything else is optional and discoverable in the
+ * Data Transfer Rate Table. Indicated are typical rates. The actual
+ * rates may vary slightly and are also specified in the Data Transfer
+ * Rate Table.
+ */
+#define XFERRATE_I3C_SDR0              0x00    /* 12.5 MHz */
+#define XFERRATE_I3C_SDR1              0x01    /* 8 MHz */
+#define XFERRATE_I3C_SDR2              0x02    /* 6 MHz */
+#define XFERRATE_I3C_SDR3              0x03    /* 4 MHz */
+#define XFERRATE_I3C_SDR4              0x04    /* 2 MHz */
+#define XFERRATE_I3C_SDR_FM_FMP                0x05    /* 400 KHz / 1 MHz */
+#define XFERRATE_I3C_SDR_USER6         0x06    /* User Defined */
+#define XFERRATE_I3C_SDR_USER7         0x07    /* User Defined */
+
+#define XFERRATE_I2C_FM                        0x00    /* 400 KHz */
+#define XFERRATE_I2C_FMP               0x01    /* 1 MHz */
+#define XFERRATE_I2C_USER2             0x02    /* User Defined */
+#define XFERRATE_I2C_USER3             0x03    /* User Defined */
+#define XFERRATE_I2C_USER4             0x04    /* User Defined */
+#define XFERRATE_I2C_USER5             0x05    /* User Defined */
+#define XFERRATE_I2C_USER6             0x06    /* User Defined */
+#define XFERRATE_I2C_USER7             0x07    /* User Defined */
+
+/*
+ * Master Data Transfer Rate Table Mode ID values.
+ */
+#define XFERRATE_MODE_I3C              0x00
+#define XFERRATE_MODE_I2C              0x08
+
+/*
+ * Master Data Transfer Rate Table Entry Bits Definitions
+ */
+#define XFERRATE_MODE_ID               GENMASK(31, 28)
+#define XFERRATE_RATE_ID               GENMASK(22, 20)
+#define XFERRATE_ACTUAL_RATE_KHZ       GENMASK(19, 0)
+
+#endif
index c951ad2..ed46e60 100644 (file)
@@ -3844,8 +3844,6 @@ static void its_vpe_schedule(struct its_vpe *vpe)
        val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0;
        val |= GICR_VPENDBASER_Valid;
        gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
-
-       its_wait_vpt_parse_complete();
 }
 
 static void its_vpe_deschedule(struct its_vpe *vpe)
@@ -3893,6 +3891,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
                its_vpe_deschedule(vpe);
                return 0;
 
+       case COMMIT_VPE:
+               its_wait_vpt_parse_complete();
+               return 0;
+
        case INVALL_VPE:
                its_vpe_invall(vpe);
                return 0;
@@ -4054,8 +4056,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe,
        val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id);
 
        gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER);
-
-       its_wait_vpt_parse_complete();
 }
 
 static void its_vpe_4_1_deschedule(struct its_vpe *vpe,
@@ -4130,6 +4130,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
                its_vpe_4_1_deschedule(vpe, info);
                return 0;
 
+       case COMMIT_VPE:
+               its_wait_vpt_parse_complete();
+               return 0;
+
        case INVALL_VPE:
                its_vpe_4_1_invall(vpe);
                return 0;
index 0c18714..5d1dc99 100644 (file)
@@ -232,6 +232,8 @@ int its_make_vpe_non_resident(struct its_vpe *vpe, bool db)
        if (!ret)
                vpe->resident = false;
 
+       vpe->ready = false;
+
        return ret;
 }
 
@@ -258,6 +260,23 @@ int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en)
        return ret;
 }
 
+int its_commit_vpe(struct its_vpe *vpe)
+{
+       struct its_cmd_info info = {
+               .cmd_type = COMMIT_VPE,
+       };
+       int ret;
+
+       WARN_ON(preemptible());
+
+       ret = its_send_vpe_cmd(vpe, &info);
+       if (!ret)
+               vpe->ready = true;
+
+       return ret;
+}
+
+
 int its_invall_vpe(struct its_vpe *vpe)
 {
        struct its_cmd_info info = {
index 30ba357..b7e2d96 100644 (file)
@@ -463,6 +463,15 @@ config DM_MULTIPATH_HST
 
          If unsure, say N.
 
+config DM_MULTIPATH_IOA
+       tristate "I/O Path Selector based on CPU submission"
+       depends on DM_MULTIPATH
+       help
+         This path selector selects the path based on the CPU the IO is
+         executed on and the CPU to path mapping setup at path addition time.
+
+         If unsure, say N.
+
 config DM_DELAY
        tristate "I/O delaying target"
        depends on BLK_DEV_DM
@@ -530,11 +539,22 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG
        bool "Verity data device root hash signature verification support"
        depends on DM_VERITY
        select SYSTEM_DATA_VERIFICATION
-         help
+       help
          Add ability for dm-verity device to be validated if the
          pre-generated tree of cryptographic checksums passed has a pkcs#7
          signature file that can validate the roothash of the tree.
 
+         By default, rely on the builtin trusted keyring.
+
+         If unsure, say N.
+
+config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING
+       bool "Verity data device root hash signature verification with secondary keyring"
+       depends on DM_VERITY_VERIFY_ROOTHASH_SIG
+       depends on SECONDARY_TRUSTED_KEYRING
+       help
+         Rely on the secondary trusted keyring to verify dm-verity signatures.
+
          If unsure, say N.
 
 config DM_VERITY_FEC
index 6d3e234..ef7ddc2 100644 (file)
@@ -7,23 +7,28 @@ dm-mod-y      += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
                   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
                   dm-rq.o
 dm-multipath-y += dm-path-selector.o dm-mpath.o
+dm-historical-service-time-y += dm-ps-historical-service-time.o
+dm-io-affinity-y += dm-ps-io-affinity.o
+dm-queue-length-y += dm-ps-queue-length.o
+dm-round-robin-y += dm-ps-round-robin.o
+dm-service-time-y += dm-ps-service-time.o
 dm-snapshot-y  += dm-snap.o dm-exception-store.o dm-snap-transient.o \
                    dm-snap-persistent.o
 dm-mirror-y    += dm-raid1.o
-dm-log-userspace-y \
-               += dm-log-userspace-base.o dm-log-userspace-transfer.o
+dm-log-userspace-y += dm-log-userspace-base.o dm-log-userspace-transfer.o
 dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
 dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
 dm-cache-y     += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
                    dm-cache-background-tracker.o
-dm-cache-smq-y   += dm-cache-policy-smq.o
+dm-cache-smq-y += dm-cache-policy-smq.o
 dm-ebs-y       += dm-ebs-target.o
 dm-era-y       += dm-era-target.o
 dm-clone-y     += dm-clone-target.o dm-clone-metadata.o
 dm-verity-y    += dm-verity-target.o
+dm-zoned-y     += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
+
 md-mod-y       += md.o md-bitmap.o
 raid456-y      += raid5.o raid5-cache.o raid5-ppl.o
-dm-zoned-y     += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
 linear-y       += md-linear.o
 multipath-y    += md-multipath.o
 faulty-y       += md-faulty.o
@@ -59,14 +64,15 @@ obj-$(CONFIG_DM_MULTIPATH)  += dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_QL)  += dm-queue-length.o
 obj-$(CONFIG_DM_MULTIPATH_ST)  += dm-service-time.o
 obj-$(CONFIG_DM_MULTIPATH_HST) += dm-historical-service-time.o
+obj-$(CONFIG_DM_MULTIPATH_IOA) += dm-io-affinity.o
 obj-$(CONFIG_DM_SWITCH)                += dm-switch.o
 obj-$(CONFIG_DM_SNAPSHOT)      += dm-snapshot.o
-obj-$(CONFIG_DM_PERSISTENT_DATA)       += persistent-data/
+obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/
 obj-$(CONFIG_DM_MIRROR)                += dm-mirror.o dm-log.o dm-region-hash.o
 obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
 obj-$(CONFIG_DM_ZERO)          += dm-zero.o
-obj-$(CONFIG_DM_RAID)  += dm-raid.o
-obj-$(CONFIG_DM_THIN_PROVISIONING)     += dm-thin-pool.o
+obj-$(CONFIG_DM_RAID)          += dm-raid.o
+obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
 obj-$(CONFIG_DM_VERITY)                += dm-verity.o
 obj-$(CONFIG_DM_CACHE)         += dm-cache.o
 obj-$(CONFIG_DM_CACHE_SMQ)     += dm-cache-smq.o
index 4bc453f..541c450 100644 (file)
@@ -2840,7 +2840,6 @@ static void cache_postsuspend(struct dm_target *ti)
 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
                        bool dirty, uint32_t hint, bool hint_valid)
 {
-       int r;
        struct cache *cache = context;
 
        if (dirty) {
@@ -2849,11 +2848,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
        } else
                clear_bit(from_cblock(cblock), cache->dirty_bitset);
 
-       r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
-       if (r)
-               return r;
-
-       return 0;
+       return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
 }
 
 /*
index 392337f..5f9f9b3 100644 (file)
@@ -1090,16 +1090,16 @@ static const struct crypt_iv_operations crypt_iv_tcw_ops = {
        .post      = crypt_iv_tcw_post
 };
 
-static struct crypt_iv_operations crypt_iv_random_ops = {
+static const struct crypt_iv_operations crypt_iv_random_ops = {
        .generator = crypt_iv_random_gen
 };
 
-static struct crypt_iv_operations crypt_iv_eboiv_ops = {
+static const struct crypt_iv_operations crypt_iv_eboiv_ops = {
        .ctr       = crypt_iv_eboiv_ctr,
        .generator = crypt_iv_eboiv_gen
 };
 
-static struct crypt_iv_operations crypt_iv_elephant_ops = {
+static const struct crypt_iv_operations crypt_iv_elephant_ops = {
        .ctr       = crypt_iv_elephant_ctr,
        .dtr       = crypt_iv_elephant_dtr,
        .init      = crypt_iv_elephant_init,
@@ -3166,11 +3166,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-               cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+               cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
                                                  1, devname);
        else
-               cc->crypt_queue = alloc_workqueue("kcryptd/%s",
-                                                 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+               cc->crypt_queue = alloc_workqueue("kcryptd-%s",
+                                                 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+                                                 WQ_UNBOUND | WQ_SYSFS,
                                                  num_online_cpus(), devname);
        if (!cc->crypt_queue) {
                ti->error = "Couldn't create kcryptd queue";
index cb85610..55bcfb7 100644 (file)
@@ -86,7 +86,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
                else
                        ba = dm_bufio_new(ec->bufio, block, &b);
 
-               if (unlikely(IS_ERR(ba))) {
+               if (IS_ERR(ba)) {
                        /*
                         * Carry on with next buffer, if any, to issue all possible
                         * data but return error.
index cd0478d..5e306bb 100644 (file)
@@ -1600,6 +1600,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
 
        if (!argc) {
                DMWARN("Empty message received.");
+               r = -EINVAL;
                goto out_argv;
        }
 
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
new file mode 100644 (file)
index 0000000..077655c
--- /dev/null
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Oracle Corporation
+ *
+ * Module Author: Mike Christie
+ */
+#include "dm-path-selector.h"
+
+#include <linux/device-mapper.h>
+#include <linux/module.h>
+
+#define DM_MSG_PREFIX "multipath io-affinity"
+
+struct path_info {
+       struct dm_path *path;
+       cpumask_var_t cpumask;
+       refcount_t refcount;
+       bool failed;
+};
+
+struct selector {
+       struct path_info **path_map;
+       cpumask_var_t path_mask;
+       atomic_t map_misses;
+};
+
+static void ioa_free_path(struct selector *s, unsigned int cpu)
+{
+       struct path_info *pi = s->path_map[cpu];
+
+       if (!pi)
+               return;
+
+       if (refcount_dec_and_test(&pi->refcount)) {
+               cpumask_clear_cpu(cpu, s->path_mask);
+               free_cpumask_var(pi->cpumask);
+               kfree(pi);
+
+               s->path_map[cpu] = NULL;
+       }
+}
+
+static int ioa_add_path(struct path_selector *ps, struct dm_path *path,
+                       int argc, char **argv, char **error)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi = NULL;
+       unsigned int cpu;
+       int ret;
+
+       if (argc != 1) {
+               *error = "io-affinity ps: invalid number of arguments";
+               return -EINVAL;
+       }
+
+       pi = kzalloc(sizeof(*pi), GFP_KERNEL);
+       if (!pi) {
+               *error = "io-affinity ps: Error allocating path context";
+               return -ENOMEM;
+       }
+
+       pi->path = path;
+       path->pscontext = pi;
+       refcount_set(&pi->refcount, 1);
+
+       if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) {
+               *error = "io-affinity ps: Error allocating cpumask context";
+               ret = -ENOMEM;
+               goto free_pi;
+       }
+
+       ret = cpumask_parse(argv[0], pi->cpumask);
+       if (ret) {
+               *error = "io-affinity ps: invalid cpumask";
+               ret = -EINVAL;
+               goto free_mask;
+       }
+
+       for_each_cpu(cpu, pi->cpumask) {
+               if (cpu >= nr_cpu_ids) {
+                       DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u",
+                                    cpu, nr_cpu_ids);
+                       break;
+               }
+
+               if (s->path_map[cpu]) {
+                       DMWARN("CPU mapping for %u exists. Ignoring.", cpu);
+                       continue;
+               }
+
+               cpumask_set_cpu(cpu, s->path_mask);
+               s->path_map[cpu] = pi;
+               refcount_inc(&pi->refcount);
+               continue;
+       }
+
+       if (refcount_dec_and_test(&pi->refcount)) {
+               *error = "io-affinity ps: No new/valid CPU mapping found";
+               ret = -EINVAL;
+               goto free_mask;
+       }
+
+       return 0;
+
+free_mask:
+       free_cpumask_var(pi->cpumask);
+free_pi:
+       kfree(pi);
+       return ret;
+}
+
+static int ioa_create(struct path_selector *ps, unsigned argc, char **argv)
+{
+       struct selector *s;
+
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+
+       s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
+                             GFP_KERNEL);
+       if (!s->path_map)
+               goto free_selector;
+
+       if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL))
+               goto free_map;
+
+       atomic_set(&s->map_misses, 0);
+       ps->context = s;
+       return 0;
+
+free_map:
+       kfree(s->path_map);
+free_selector:
+       kfree(s);
+       return -ENOMEM;
+}
+
+static void ioa_destroy(struct path_selector *ps)
+{
+       struct selector *s = ps->context;
+       unsigned cpu;
+
+       for_each_cpu(cpu, s->path_mask)
+               ioa_free_path(s, cpu);
+
+       free_cpumask_var(s->path_mask);
+       kfree(s->path_map);
+       kfree(s);
+
+       ps->context = NULL;
+}
+
+static int ioa_status(struct path_selector *ps, struct dm_path *path,
+                     status_type_t type, char *result, unsigned int maxlen)
+{
+       struct selector *s = ps->context;
+       struct path_info *pi;
+       int sz = 0;
+
+       if (!path) {
+               DMEMIT("0 ");
+               return sz;
+       }
+
+       switch(type) {
+       case STATUSTYPE_INFO:
+               DMEMIT("%d ", atomic_read(&s->map_misses));
+               break;
+       case STATUSTYPE_TABLE:
+               pi = path->pscontext;
+               DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask));
+               break;
+       }
+
+       return sz;
+}
+
+static void ioa_fail_path(struct path_selector *ps, struct dm_path *p)
+{
+       struct path_info *pi = p->pscontext;
+
+       pi->failed = true;
+}
+
+static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p)
+{
+       struct path_info *pi = p->pscontext;
+
+       pi->failed = false;
+       return 0;
+}
+
+static struct dm_path *ioa_select_path(struct path_selector *ps,
+                                      size_t nr_bytes)
+{
+       unsigned int cpu, node;
+       struct selector *s = ps->context;
+       const struct cpumask *cpumask;
+       struct path_info *pi;
+       int i;
+
+       cpu = get_cpu();
+
+       pi = s->path_map[cpu];
+       if (pi && !pi->failed)
+               goto done;
+
+       /*
+        * Perf is not optimal, but we at least try the local node then just
+        * try not to fail.
+        */
+       if (!pi)
+               atomic_inc(&s->map_misses);
+
+       node = cpu_to_node(cpu);
+       cpumask = cpumask_of_node(node);
+       for_each_cpu(i, cpumask) {
+               pi = s->path_map[i];
+               if (pi && !pi->failed)
+                       goto done;
+       }
+
+       for_each_cpu(i, s->path_mask) {
+               pi = s->path_map[i];
+               if (pi && !pi->failed)
+                       goto done;
+       }
+       pi = NULL;
+
+done:
+       put_cpu();
+       return pi ? pi->path : NULL;
+}
+
+static struct path_selector_type ioa_ps = {
+       .name           = "io-affinity",
+       .module         = THIS_MODULE,
+       .table_args     = 1,
+       .info_args      = 1,
+       .create         = ioa_create,
+       .destroy        = ioa_destroy,
+       .status         = ioa_status,
+       .add_path       = ioa_add_path,
+       .fail_path      = ioa_fail_path,
+       .reinstate_path = ioa_reinstate_path,
+       .select_path    = ioa_select_path,
+};
+
+static int __init dm_ioa_init(void)
+{
+       int ret = dm_register_path_selector(&ioa_ps);
+
+       if (ret < 0)
+               DMERR("register failed %d", ret);
+       return ret;
+}
+
+static void __exit dm_ioa_exit(void)
+{
+       int ret = dm_unregister_path_selector(&ioa_ps);
+
+       if (ret < 0)
+               DMERR("unregister failed %d", ret);
+}
+
+module_init(dm_ioa_init);
+module_exit(dm_ioa_exit);
+
+MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on");
+MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>");
+MODULE_LICENSE("GPL");
index 151d022..df359d3 100644 (file)
@@ -496,7 +496,7 @@ static void stripe_io_hints(struct dm_target *ti,
 static struct target_type stripe_target = {
        .name   = "striped",
        .version = {1, 6, 0},
-       .features = DM_TARGET_PASSES_INTEGRITY,
+       .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
        .module = THIS_MODULE,
        .ctr    = stripe_ctr,
        .dtr    = stripe_dtr,
index bff4c7f..262e2b0 100644 (file)
@@ -550,6 +550,7 @@ static int switch_iterate_devices(struct dm_target *ti,
 static struct target_type switch_target = {
        .name = "switch",
        .version = {1, 1, 0},
+       .features = DM_TARGET_NOWAIT,
        .module = THIS_MODULE,
        .ctr = switch_ctr,
        .dtr = switch_dtr,
index e673dac..7357c1b 100644 (file)
@@ -178,6 +178,7 @@ static void unstripe_io_hints(struct dm_target *ti,
 static struct target_type unstripe_target = {
        .name = "unstriped",
        .version = {1, 1, 0},
+       .features = DM_TARGET_NOWAIT,
        .module = THIS_MODULE,
        .ctr = unstripe_ctr,
        .dtr = unstripe_dtr,
index f74982d..6b8e5bd 100644 (file)
@@ -538,6 +538,15 @@ static int verity_verify_io(struct dm_verity_io *io)
 }
 
 /*
+ * Skip verity work in response to I/O error when system is shutting down.
+ */
+static inline bool verity_is_system_shutting_down(void)
+{
+       return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+               || system_state == SYSTEM_RESTART;
+}
+
+/*
  * End one "io" structure with a given error.
  */
 static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
@@ -564,7 +573,8 @@ static void verity_end_io(struct bio *bio)
 {
        struct dm_verity_io *io = bio->bi_private;
 
-       if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+       if (bio->bi_status &&
+           (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) {
                verity_finish_io(io, bio->bi_status);
                return;
        }
index 614e43d..29385dc 100644 (file)
@@ -119,8 +119,13 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len,
        }
 
        ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data,
-                               sig_len, NULL, VERIFYING_UNSPECIFIED_SIGNATURE,
-                               NULL, NULL);
+                               sig_len,
+#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING
+                               VERIFY_USE_SECONDARY_KEYRING,
+#else
+                               NULL,
+#endif
+                               VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL);
 
        return ret;
 }
index b65ca8d..faa1dbf 100644 (file)
@@ -59,6 +59,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
 static struct target_type zero_target = {
        .name   = "zero",
        .version = {1, 1, 0},
+       .features = DM_TARGET_NOWAIT,
        .module = THIS_MODULE,
        .ctr    = zero_ctr,
        .map    = zero_map,
index 5b2f371..b3c3c8b 100644 (file)
@@ -1586,7 +1586,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
                ci.sector_count = bio_sectors(bio);
                while (ci.sector_count && !error) {
                        error = __split_and_process_non_flush(&ci);
-                       if (current->bio_list && ci.sector_count && !error) {
+                       if (ci.sector_count && !error) {
                                /*
                                 * Remainder must be passed to submit_bio_noacct()
                                 * so that it gets handled *after* bios already submitted
index b64d331..07e0ca2 100644 (file)
@@ -1119,7 +1119,7 @@ static inline void menelaus_rtc_init(struct menelaus_chip *m)
                menelaus_write_reg(MENELAUS_RTC_CTRL, m->rtc_control);
        }
 
-       err = rtc_register_device(m->rtc);
+       err = devm_rtc_register_device(m->rtc);
        if (err) {
                if (alarm) {
                        menelaus_remove_irq_work(MENELAUS_RTCALM_IRQ);
index 40fa994..f399edc 100644 (file)
@@ -629,10 +629,8 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
                ubi->bad_peb_limit = get_bad_peb_limit(ubi, max_beb_per1024);
        }
 
-       if (ubi->mtd->type == MTD_NORFLASH) {
-               ubi_assert(ubi->mtd->writesize == 1);
+       if (ubi->mtd->type == MTD_NORFLASH)
                ubi->nor_flash = 1;
-       }
 
        ubi->min_io_size = ubi->mtd->writesize;
        ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
@@ -1352,8 +1350,6 @@ static int bytes_str_to_int(const char *str)
                fallthrough;
        case 'K':
                result *= 1024;
-               if (endp[1] == 'i' && endp[2] == 'B')
-                       endp += 2;
        case '\0':
                break;
        default:
index 14d890b..2f3312c 100644 (file)
@@ -535,7 +535,14 @@ int ubi_io_sync_erase(struct ubi_device *ubi, int pnum, int torture)
                return -EROFS;
        }
 
-       if (ubi->nor_flash) {
+       /*
+        * If the flash is ECC-ed then we have to erase the ECC block before we
+        * can write to it. But the write is in preparation to an erase in the
+        * first place. This means we cannot zero out EC and VID before the
+        * erase and we just have to hope the flash starts erasing from the
+        * start of the page.
+        */
+       if (ubi->nor_flash && ubi->mtd->writesize == 1) {
                err = nor_erase_prepare(ubi, pnum);
                if (err)
                        return err;
index 052975e..4c41df6 100644 (file)
@@ -3072,6 +3072,7 @@ static int virtnet_probe(struct virtio_device *vdev)
                        dev_err(&vdev->dev,
                                "device MTU appears to have changed it is now %d < %d",
                                mtu, dev->min_mtu);
+                       err = -EINVAL;
                        goto free;
                }
 
index 884023e..d13b8d1 100644 (file)
@@ -244,10 +244,6 @@ config PCMCIA_VRC4171
        tristate "NEC VRC4171 Card Controllers support"
        depends on CPU_VR41XX && ISA && PCMCIA
 
-config PCMCIA_VRC4173
-       tristate "NEC VRC4173 CARDU support"
-       depends on CPU_VR41XX && PCI && PCMCIA
-
 config OMAP_CF
        tristate "OMAP CompactFlash Controller"
        depends on PCMCIA && ARCH_OMAP16XX
index 01779c5..d82c07c 100644 (file)
@@ -30,7 +30,6 @@ obj-$(CONFIG_PCMCIA_SA1100)                   += sa1100_cs.o
 obj-$(CONFIG_PCMCIA_SA1111)                    += sa1111_cs.o
 obj-$(CONFIG_PCMCIA_BCM63XX)                   += bcm63xx_pcmcia.o
 obj-$(CONFIG_PCMCIA_VRC4171)                   += vrc4171_card.o
-obj-$(CONFIG_PCMCIA_VRC4173)                   += vrc4173_cardu.o
 obj-$(CONFIG_OMAP_CF)                          += omap_cf.o
 obj-$(CONFIG_AT91_CF)                          += at91_cf.o
 obj-$(CONFIG_ELECTRA_CF)                       += electra_cf.o
index a7c7c7c..a6fbc70 100644 (file)
@@ -452,7 +452,7 @@ static int db1x_pcmcia_socket_probe(struct platform_device *pdev)
                printk(KERN_INFO "db1xxx-ss: unknown board %d!\n", bid);
                ret = -ENODEV;
                goto out0;
-       };
+       }
 
        /*
         * gather resources necessary and optional nice-to-haves to
index 35158cf..40a5cff 100644 (file)
@@ -229,6 +229,8 @@ static int electra_cf_probe(struct platform_device *ofdev)
 
        cf->socket.pci_irq = cf->irq;
 
+       status = -EINVAL;
+
        prop = of_get_property(np, "card-detect-gpio", NULL);
        if (!prop)
                goto fail1;
index d3ef553..f0b2c2d 100644 (file)
@@ -252,11 +252,15 @@ static int __init omap_cf_probe(struct platform_device *pdev)
        /* pcmcia layer only remaps "real" memory */
        cf->socket.io_offset = (unsigned long)
                        ioremap(cf->phys_cf + SZ_4K, SZ_2K);
-       if (!cf->socket.io_offset)
+       if (!cf->socket.io_offset) {
+               status = -ENOMEM;
                goto fail1;
+       }
 
-       if (!request_mem_region(cf->phys_cf, SZ_8K, driver_name))
+       if (!request_mem_region(cf->phys_cf, SZ_8K, driver_name)) {
+               status = -ENXIO;
                goto fail1;
+       }
 
        /* NOTE:  CF conflicts with MMC1 */
        omap_cfg_reg(W11_1610_CF_CD1);
diff --git a/drivers/pcmcia/vrc4173_cardu.c b/drivers/pcmcia/vrc4173_cardu.c
deleted file mode 100644 (file)
index 9fb0c3a..0000000
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * FILE NAME
- *     drivers/pcmcia/vrc4173_cardu.c
- *
- * BRIEF MODULE DESCRIPTION
- *     NEC VRC4173 CARDU driver for Socket Services
- *     (This device doesn't support CardBus. it is supporting only 16bit PC Card.)
- *
- * Copyright 2002,2003 Yoichi Yuasa <yuasa@linux-mips.org>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-
-#include <asm/io.h>
-
-#include <pcmcia/ss.h>
-
-#include "vrc4173_cardu.h"
-
-MODULE_DESCRIPTION("NEC VRC4173 CARDU driver for Socket Services");
-MODULE_AUTHOR("Yoichi Yuasa <yuasa@linux-mips.org>");
-MODULE_LICENSE("GPL");
-
-static int vrc4173_cardu_slots;
-
-static vrc4173_socket_t cardu_sockets[CARDU_MAX_SOCKETS];
-
-extern struct socket_info_t *pcmcia_register_socket (int slot,
-                                                     struct pccard_operations *vtable,
-                                                     int use_bus_pm);
-extern void pcmcia_unregister_socket(struct socket_info_t *s);
-
-static inline uint8_t exca_readb(vrc4173_socket_t *socket, uint16_t offset)
-{
-       return readb(socket->base + EXCA_REGS_BASE + offset);
-}
-
-static inline uint16_t exca_readw(vrc4173_socket_t *socket, uint16_t offset)
-{
-       uint16_t val;
-
-       val = readb(socket->base + EXCA_REGS_BASE + offset);
-       val |= (u16)readb(socket->base + EXCA_REGS_BASE + offset + 1) << 8;
-
-       return val;
-}
-
-static inline void exca_writeb(vrc4173_socket_t *socket, uint16_t offset, uint8_t val)
-{
-       writeb(val, socket->base + EXCA_REGS_BASE + offset);
-}
-
-static inline void exca_writew(vrc4173_socket_t *socket, uint8_t offset, uint16_t val)
-{
-       writeb((u8)val, socket->base + EXCA_REGS_BASE + offset);
-       writeb((u8)(val >> 8), socket->base + EXCA_REGS_BASE + offset + 1);
-}
-
-static inline uint32_t cardbus_socket_readl(vrc4173_socket_t *socket, u16 offset)
-{
-       return readl(socket->base + CARDBUS_SOCKET_REGS_BASE + offset);
-}
-
-static inline void cardbus_socket_writel(vrc4173_socket_t *socket, u16 offset, uint32_t val)
-{
-       writel(val, socket->base + CARDBUS_SOCKET_REGS_BASE + offset);
-}
-
-static void cardu_pciregs_init(struct pci_dev *dev)
-{
-       u32 syscnt;
-       u16 brgcnt;
-       u8 devcnt;
-
-       pci_write_config_dword(dev, 0x1c, 0x10000000);
-       pci_write_config_dword(dev, 0x20, 0x17fff000);
-       pci_write_config_dword(dev, 0x2c, 0);
-       pci_write_config_dword(dev, 0x30, 0xfffc);
-
-       pci_read_config_word(dev, BRGCNT, &brgcnt);
-       brgcnt &= ~IREQ_INT;
-       pci_write_config_word(dev, BRGCNT, brgcnt);
-
-       pci_read_config_dword(dev, SYSCNT, &syscnt);
-       syscnt &= ~(BAD_VCC_REQ_DISB|PCPCI_EN|CH_ASSIGN_MASK|SUB_ID_WR_EN|PCI_CLK_RIN);
-       syscnt |= (CH_ASSIGN_NODMA|ASYN_INT_MODE);
-       pci_write_config_dword(dev, SYSCNT, syscnt);
-
-       pci_read_config_byte(dev, DEVCNT, &devcnt);
-       devcnt &= ~(ZOOM_VIDEO_EN|SR_PCI_INT_SEL_MASK|PCI_INT_MODE|IRQ_MODE);
-       devcnt |= (SR_PCI_INT_SEL_NONE|IFG);
-       pci_write_config_byte(dev, DEVCNT, devcnt);
-
-       pci_write_config_byte(dev, CHIPCNT, S_PREF_DISB);
-
-       pci_write_config_byte(dev, SERRDIS, 0);
-}
-
-static int cardu_init(unsigned int slot)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[slot];
-
-       cardu_pciregs_init(socket->dev);
-
-       /* CARD_SC bits are cleared by reading CARD_SC. */
-       exca_writeb(socket, GLO_CNT, 0);
-
-       socket->cap.features |= SS_CAP_PCCARD | SS_CAP_PAGE_REGS;
-       socket->cap.irq_mask = 0;
-       socket->cap.map_size = 0x1000;
-       socket->cap.pci_irq  = socket->dev->irq;
-       socket->events = 0;
-       spin_lock_init(socket->event_lock);
-
-       /* Enable PC Card status interrupts */
-       exca_writeb(socket, CARD_SCI, CARD_DT_EN|RDY_EN|BAT_WAR_EN|BAT_DEAD_EN);
-
-       return 0;
-}
-
-static int cardu_register_callback(unsigned int sock,
-                                           void (*handler)(void *, unsigned int),
-                                           void * info)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-
-       socket->handler = handler;
-       socket->info = info;
-
-       return 0;
-}
-
-static int cardu_inquire_socket(unsigned int sock, socket_cap_t *cap)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-
-       *cap = socket->cap;
-
-       return 0;
-}
-
-static int cardu_get_status(unsigned int sock, u_int *value)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-       uint32_t state;
-       uint8_t status;
-       u_int val = 0;
-
-       status = exca_readb(socket, IF_STATUS);
-       if (status & CARD_PWR) val |= SS_POWERON;
-       if (status & READY) val |= SS_READY;
-       if (status & CARD_WP) val |= SS_WRPROT;
-       if ((status & (CARD_DETECT1|CARD_DETECT2)) == (CARD_DETECT1|CARD_DETECT2))
-               val |= SS_DETECT;
-       if (exca_readb(socket, INT_GEN_CNT) & CARD_TYPE_IO) {
-               if (status & STSCHG) val |= SS_STSCHG;
-       } else {
-               status &= BV_DETECT_MASK;
-               if (status != BV_DETECT_GOOD) {
-                       if (status == BV_DETECT_WARN) val |= SS_BATWARN;
-                       else val |= SS_BATDEAD;
-               }
-       }
-
-       state = cardbus_socket_readl(socket, SKT_PRE_STATE);
-       if (state & VOL_3V_CARD_DT) val |= SS_3VCARD;
-       if (state & VOL_XV_CARD_DT) val |= SS_XVCARD;
-       if (state & CB_CARD_DT) val |= SS_CARDBUS;
-       if (!(state &
-             (VOL_YV_CARD_DT|VOL_XV_CARD_DT|VOL_3V_CARD_DT|VOL_5V_CARD_DT|CCD20|CCD10)))
-               val |= SS_PENDING;
-
-       *value = val;
-
-       return 0;
-}
-
-static inline uint8_t set_Vcc_value(u_char Vcc)
-{
-       switch (Vcc) {
-       case 33:
-               return VCC_3V;
-       case 50:
-               return VCC_5V;
-       }
-
-       return VCC_0V;
-}
-
-static inline uint8_t set_Vpp_value(u_char Vpp)
-{
-       switch (Vpp) {
-       case 33:
-       case 50:
-               return VPP_VCC;
-       case 120:
-               return VPP_12V;
-       }
-
-       return VPP_0V;
-}
-
-static int cardu_set_socket(unsigned int sock, socket_state_t *state)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-       uint8_t val;
-
-       if (((state->Vpp == 33) || (state->Vpp == 50)) && (state->Vpp != state->Vcc))
-                       return -EINVAL;
-
-       val = set_Vcc_value(state->Vcc);
-       val |= set_Vpp_value(state->Vpp);
-       if (state->flags & SS_OUTPUT_ENA) val |= CARD_OUT_EN;
-       exca_writeb(socket, PWR_CNT, val);
-
-       val = exca_readb(socket, INT_GEN_CNT) & CARD_REST0;
-       if (state->flags & SS_RESET) val &= ~CARD_REST0;
-       else val |= CARD_REST0;
-       if (state->flags & SS_IOCARD) val |= CARD_TYPE_IO;
-       exca_writeb(socket, INT_GEN_CNT, val);
-
-       return 0;
-}
-
-static int cardu_get_io_map(unsigned int sock, struct pccard_io_map *io)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-       uint8_t ioctl, window;
-       u_char map;
-
-       map = io->map;
-       if (map > 1)
-               return -EINVAL;
-
-       io->start = exca_readw(socket, IO_WIN_SA(map));
-       io->stop = exca_readw(socket, IO_WIN_EA(map));
-
-       ioctl = exca_readb(socket, IO_WIN_CNT);
-       window = exca_readb(socket, ADR_WIN_EN);
-       io->flags  = (window & IO_WIN_EN(map)) ? MAP_ACTIVE : 0;
-       if (ioctl & IO_WIN_DATA_AUTOSZ(map))
-               io->flags |= MAP_AUTOSZ;
-       else if (ioctl & IO_WIN_DATA_16BIT(map))
-               io->flags |= MAP_16BIT;
-
-       return 0;
-}
-
-static int cardu_set_io_map(unsigned int sock, struct pccard_io_map *io)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-       uint16_t ioctl;
-       uint8_t window, enable;
-       u_char map;
-
-       map = io->map;
-       if (map > 1)
-               return -EINVAL;
-
-       window = exca_readb(socket, ADR_WIN_EN);
-       enable = IO_WIN_EN(map);
-
-       if (window & enable) {
-               window &= ~enable;
-               exca_writeb(socket, ADR_WIN_EN, window);
-       }
-
-       exca_writew(socket, IO_WIN_SA(map), io->start);
-       exca_writew(socket, IO_WIN_EA(map), io->stop);
-
-       ioctl = exca_readb(socket, IO_WIN_CNT) & ~IO_WIN_CNT_MASK(map);
-       if (io->flags & MAP_AUTOSZ) ioctl |= IO_WIN_DATA_AUTOSZ(map);
-       else if (io->flags & MAP_16BIT) ioctl |= IO_WIN_DATA_16BIT(map);
-       exca_writeb(socket, IO_WIN_CNT, ioctl);
-
-       if (io->flags & MAP_ACTIVE)
-               exca_writeb(socket, ADR_WIN_EN, window | enable);
-
-       return 0;
-}
-
-static int cardu_get_mem_map(unsigned int sock, struct pccard_mem_map *mem)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-       uint32_t start, stop, offset, page;
-       uint8_t window;
-       u_char map;
-
-       map = mem->map;
-       if (map > 4)
-               return -EINVAL;
-
-       window = exca_readb(socket, ADR_WIN_EN);
-       mem->flags = (window & MEM_WIN_EN(map)) ? MAP_ACTIVE : 0;
-
-       start = exca_readw(socket, MEM_WIN_SA(map));
-       mem->flags |= (start & MEM_WIN_DSIZE) ? MAP_16BIT : 0;
-       start = (start & 0x0fff) << 12;
-
-       stop = exca_readw(socket, MEM_WIN_EA(map));
-       stop = ((stop & 0x0fff) << 12) + 0x0fff;
-
-       offset = exca_readw(socket, MEM_WIN_OA(map));
-       mem->flags |= (offset & MEM_WIN_WP) ? MAP_WRPROT : 0;
-       mem->flags |= (offset & MEM_WIN_REGSET) ? MAP_ATTRIB : 0;
-       offset = ((offset & 0x3fff) << 12) + start;
-       mem->card_start = offset & 0x03ffffff;
-
-       page = exca_readb(socket, MEM_WIN_SAU(map)) << 24;
-       mem->sys_start = start + page;
-       mem->sys_stop = start + page;
-
-       return 0;
-}
-
-static int cardu_set_mem_map(unsigned int sock, struct pccard_mem_map *mem)
-{
-       vrc4173_socket_t *socket = &cardu_sockets[sock];
-       uint16_t value;
-       uint8_t window, enable;
-       u_long sys_start, sys_stop, card_start;
-       u_char map;
-
-       map = mem->map;
-       sys_start = mem->sys_start;
-       sys_stop = mem->sys_stop;
-       card_start = mem->card_start;
-
-       if (map > 4 || sys_start > sys_stop || ((sys_start ^ sys_stop) >> 24) ||
-           (card_start >> 26))
-               return -EINVAL;
-
-       window = exca_readb(socket, ADR_WIN_EN);
-       enable = MEM_WIN_EN(map);
-       if (window & enable) {
-               window &= ~enable;
-               exca_writeb(socket, ADR_WIN_EN, window);
-       }
-
-       exca_writeb(socket, MEM_WIN_SAU(map), sys_start >> 24);
-
-       value = (sys_start >> 12) & 0x0fff;
-       if (mem->flags & MAP_16BIT) value |= MEM_WIN_DSIZE;
-       exca_writew(socket, MEM_WIN_SA(map), value);
-
-       value = (sys_stop >> 12) & 0x0fff;
-       exca_writew(socket, MEM_WIN_EA(map), value);
-
-       value = ((card_start - sys_start) >> 12) & 0x3fff;
-       if (mem->flags & MAP_WRPROT) value |= MEM_WIN_WP;
-       if (mem->flags & MAP_ATTRIB) value |= MEM_WIN_REGSET;
-       exca_writew(socket, MEM_WIN_OA(map), value);
-
-       if (mem->flags & MAP_ACTIVE)
-               exca_writeb(socket, ADR_WIN_EN, window | enable);
-
-       return 0;
-}
-
-static void cardu_proc_setup(unsigned int sock, struct proc_dir_entry *base)
-{
-}
-
-static struct pccard_operations cardu_operations = {
-       .init                   = cardu_init,
-       .register_callback      = cardu_register_callback,
-       .inquire_socket         = cardu_inquire_socket,
-       .get_status             = cardu_get_status,
-       .set_socket             = cardu_set_socket,
-       .get_io_map             = cardu_get_io_map,
-       .set_io_map             = cardu_set_io_map,
-       .get_mem_map            = cardu_get_mem_map,
-       .set_mem_map            = cardu_set_mem_map,
-       .proc_setup             = cardu_proc_setup,
-};
-
-static void cardu_bh(void *data)
-{
-       vrc4173_socket_t *socket = (vrc4173_socket_t *)data;
-       uint16_t events;
-
-       spin_lock_irq(&socket->event_lock);
-       events = socket->events;
-       socket->events = 0;
-       spin_unlock_irq(&socket->event_lock);
-
-       if (socket->handler)
-               socket->handler(socket->info, events);
-}
-
-static uint16_t get_events(vrc4173_socket_t *socket)
-{
-       uint16_t events = 0;
-       uint8_t csc, status;
-
-       status = exca_readb(socket, IF_STATUS);
-       csc = exca_readb(socket, CARD_SC);
-       if ((csc & CARD_DT_CHG) &&
-           ((status & (CARD_DETECT1|CARD_DETECT2)) == (CARD_DETECT1|CARD_DETECT2)))
-               events |= SS_DETECT;
-
-       if ((csc & RDY_CHG) && (status & READY))
-               events |= SS_READY;
-
-       if (exca_readb(socket, INT_GEN_CNT) & CARD_TYPE_IO) {
-               if ((csc & BAT_DEAD_ST_CHG) && (status & STSCHG))
-                       events |= SS_STSCHG;
-       } else {
-               if (csc & (BAT_WAR_CHG|BAT_DEAD_ST_CHG)) {
-                       if ((status & BV_DETECT_MASK) != BV_DETECT_GOOD) {
-                               if (status == BV_DETECT_WARN) events |= SS_BATWARN;
-                               else events |= SS_BATDEAD;
-                       }
-               }
-       }
-
-       return events;
-}
-
-static void cardu_interrupt(int irq, void *dev_id)
-{
-       vrc4173_socket_t *socket = (vrc4173_socket_t *)dev_id;
-       uint16_t events;
-
-       INIT_WORK(&socket->tq_work, cardu_bh, socket);
-
-       events = get_events(socket);
-       if (events) {
-               spin_lock(&socket->event_lock);
-               socket->events |= events;
-               spin_unlock(&socket->event_lock);
-               schedule_work(&socket->tq_work);
-       }
-}
-
-static int vrc4173_cardu_probe(struct pci_dev *dev,
-                                         const struct pci_device_id *ent)
-{
-       vrc4173_socket_t *socket;
-       unsigned long start, len, flags;
-       int slot, err, ret;
-
-       slot = vrc4173_cardu_slots++;
-       socket = &cardu_sockets[slot];
-       if (socket->noprobe != 0)
-               return -EBUSY;
-
-       sprintf(socket->name, "NEC VRC4173 CARDU%1d", slot+1);
-
-       if ((err = pci_enable_device(dev)) < 0)
-               return err;
-
-       start = pci_resource_start(dev, 0);
-       if (start == 0) {
-               ret = -ENODEV;
-               goto disable;
-       }
-
-       len = pci_resource_len(dev, 0);
-       if (len == 0) {
-               ret = -ENODEV;
-               goto disable;
-       }
-
-       flags = pci_resource_flags(dev, 0);
-       if ((flags & IORESOURCE_MEM) == 0) {
-               ret = -EBUSY;
-               goto disable;
-       }
-
-       err = pci_request_regions(dev, socket->name);
-       if (err < 0) {
-               ret = err;
-               goto disable;
-       }
-
-       socket->base = ioremap(start, len);
-       if (socket->base == NULL) {
-               ret = -ENODEV;
-               goto release;
-       }
-
-       socket->dev = dev;
-
-       socket->pcmcia_socket = pcmcia_register_socket(slot, &cardu_operations, 1);
-       if (socket->pcmcia_socket == NULL) {
-               ret =  -ENOMEM;
-               goto unmap;
-       }
-
-       if (request_irq(dev->irq, cardu_interrupt, IRQF_SHARED, socket->name, socket) < 0) {
-               ret = -EBUSY;
-               goto unregister;
-       }
-
-       printk(KERN_INFO "%s at %#08lx, IRQ %d\n", socket->name, start, dev->irq);
-
-       return 0;
-
-unregister:
-       pcmcia_unregister_socket(socket->pcmcia_socket);
-       socket->pcmcia_socket = NULL;
-unmap:
-       iounmap(socket->base);
-       socket->base = NULL;
-release:
-       pci_release_regions(dev);
-disable:
-       pci_disable_device(dev);
-       return ret;
-}
-
-static int vrc4173_cardu_setup(char *options)
-{
-       if (options == NULL || *options == '\0')
-               return 1;
-
-       if (strncmp(options, "cardu1:", 7) == 0) {
-               options += 7;
-               if (*options != '\0') {
-                       if (strncmp(options, "noprobe", 7) == 0) {
-                               cardu_sockets[CARDU1].noprobe = 1;
-                               options += 7;
-                       }
-
-                       if (*options != ',')
-                               return 1;
-               } else
-                       return 1;
-       }
-
-       if (strncmp(options, "cardu2:", 7) == 0) {
-               options += 7;
-               if ((*options != '\0') && (strncmp(options, "noprobe", 7) == 0))
-                       cardu_sockets[CARDU2].noprobe = 1;
-       }
-
-       return 1;
-}
-
-__setup("vrc4173_cardu=", vrc4173_cardu_setup);
-
-static const struct pci_device_id vrc4173_cardu_id_table[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_NAPCCARD) },
-        {0, }
-};
-
-static struct pci_driver vrc4173_cardu_driver = {
-       .name           = "NEC VRC4173 CARDU",
-       .probe          = vrc4173_cardu_probe,
-       .id_table       = vrc4173_cardu_id_table,
-};
-
-static int vrc4173_cardu_init(void)
-{
-       vrc4173_cardu_slots = 0;
-
-       return pci_register_driver(&vrc4173_cardu_driver);
-}
-
-static void vrc4173_cardu_exit(void)
-{
-       pci_unregister_driver(&vrc4173_cardu_driver);
-}
-
-module_init(vrc4173_cardu_init);
-module_exit(vrc4173_cardu_exit);
-MODULE_DEVICE_TABLE(pci, vrc4173_cardu_id_table);
diff --git a/drivers/pcmcia/vrc4173_cardu.h b/drivers/pcmcia/vrc4173_cardu.h
deleted file mode 100644 (file)
index a7d9601..0000000
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * FILE NAME
- *     drivers/pcmcia/vrc4173_cardu.h
- *
- * BRIEF MODULE DESCRIPTION
- *     Include file for NEC VRC4173 CARDU.
- *
- * Copyright 2002 Yoichi Yuasa <yuasa@linux-mips.org>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License as published by the
- *  Free Software Foundation; either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
- *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#ifndef _VRC4173_CARDU_H
-#define _VRC4173_CARDU_H
-
-#include <linux/pci.h>
-
-#include <pcmcia/ss.h>
-
-#define CARDU_MAX_SOCKETS      2
-#define CARDU1                 0
-#define CARDU2                 1
-
-/*
- * PCI Configuration Registers
- */
-#define BRGCNT                 0x3e
- #define POST_WR_EN            0x0400
- #define MEM1_PREF_EN          0x0200
- #define MEM0_PREF_EN          0x0100
- #define IREQ_INT              0x0080
- #define CARD_RST              0x0040
- #define MABORT_MODE           0x0020
- #define VGA_EN                        0x0008
- #define ISA_EN                        0x0004
- #define SERR_EN               0x0002
- #define PERR_EN               0x0001
-
-#define SYSCNT                 0x80
- #define BAD_VCC_REQ_DISB      0x00200000
- #define PCPCI_EN              0x00080000
- #define CH_ASSIGN_MASK                0x00070000
- #define CH_ASSIGN_NODMA       0x00040000
- #define SUB_ID_WR_EN          0x00000008
- #define ASYN_INT_MODE         0x00000004
- #define PCI_CLK_RIN           0x00000002
-
-#define DEVCNT                 0x91
- #define ZOOM_VIDEO_EN         0x40
- #define SR_PCI_INT_SEL_MASK   0x18
- #define SR_PCI_INT_SEL_NONE   0x00
- #define PCI_INT_MODE          0x04
- #define IRQ_MODE              0x02
- #define IFG                   0x01
-
-#define CHIPCNT                        0x9c
- #define S_PREF_DISB           0x10
-
-#define SERRDIS                        0x9f
- #define SERR_DIS_MAB          0x10
- #define SERR_DIS_TAB          0x08
- #define SERR_DIS_DT_PERR      0x04
-
-/*
- * ExCA Registers
- */
-#define EXCA_REGS_BASE         0x800
-#define EXCA_REGS_SIZE         0x800
-
-#define ID_REV                 0x000
- #define IF_TYPE_16BIT         0x80
-
-#define IF_STATUS              0x001
- #define CARD_PWR              0x40
- #define READY                 0x20
- #define CARD_WP               0x10
- #define CARD_DETECT2          0x08
- #define CARD_DETECT1          0x04
- #define BV_DETECT_MASK                0x03
- #define BV_DETECT_GOOD                0x03    /* Memory card */
- #define BV_DETECT_WARN                0x02
- #define BV_DETECT_BAD1                0x01
- #define BV_DETECT_BAD0                0x00
- #define STSCHG                        0x02    /* I/O card */
- #define SPKR                  0x01
-
-#define PWR_CNT                        0x002
- #define CARD_OUT_EN           0x80
- #define VCC_MASK              0x18
- #define VCC_3V                        0x18
- #define VCC_5V                        0x10
- #define VCC_0V                        0x00
- #define VPP_MASK              0x03
- #define VPP_12V               0x02
- #define VPP_VCC               0x01
- #define VPP_0V                        0x00
-
-#define INT_GEN_CNT            0x003
- #define CARD_REST0            0x40
- #define CARD_TYPE_MASK                0x20
- #define CARD_TYPE_IO          0x20
- #define CARD_TYPE_MEM         0x00
-
-#define CARD_SC                        0x004
- #define CARD_DT_CHG           0x08
- #define RDY_CHG               0x04
- #define BAT_WAR_CHG           0x02
- #define BAT_DEAD_ST_CHG       0x01
-
-#define CARD_SCI               0x005
- #define CARD_DT_EN            0x08
- #define RDY_EN                        0x04
- #define BAT_WAR_EN            0x02
- #define BAT_DEAD_EN           0x01
-
-#define ADR_WIN_EN             0x006
- #define IO_WIN_EN(x)          (0x40 << (x))
- #define MEM_WIN_EN(x)         (0x01 << (x))
-
-#define IO_WIN_CNT             0x007
- #define IO_WIN_CNT_MASK(x)    (0x03 << ((x) << 2))
- #define IO_WIN_DATA_AUTOSZ(x) (0x02 << ((x) << 2))
- #define IO_WIN_DATA_16BIT(x)  (0x01 << ((x) << 2))
-
-#define IO_WIN_SA(x)           (0x008 + ((x) << 2))
-#define IO_WIN_EA(x)           (0x00a + ((x) << 2))
-
-#define MEM_WIN_SA(x)          (0x010 + ((x) << 3))
- #define MEM_WIN_DSIZE         0x8000
-
-#define MEM_WIN_EA(x)          (0x012 + ((x) << 3))
-
-#define MEM_WIN_OA(x)          (0x014 + ((x) << 3))
- #define MEM_WIN_WP            0x8000
- #define MEM_WIN_REGSET                0x4000
-
-#define GEN_CNT                        0x016
- #define VS2_STATUS            0x80
- #define VS1_STATUS            0x40
- #define EXCA_REG_RST_EN       0x02
-
-#define GLO_CNT                        0x01e
- #define FUN_INT_LEV           0x08
- #define INT_WB_CLR            0x04
- #define CSC_INT_LEV           0x02
-
-#define IO_WIN_OAL(x)          (0x036 + ((x) << 1))
-#define IO_WIN_OAH(x)          (0x037 + ((x) << 1))
-
-#define MEM_WIN_SAU(x)         (0x040 + (x))
-
-#define IO_SETUP_TIM           0x080
-#define IO_CMD_TIM             0x081
-#define IO_HOLD_TIM            0x082
-#define MEM_SETUP_TIM(x)       (0x084 + ((x) << 2))
-#define MEM_CMD_TIM(x)         (0x085 + ((x) << 2))
-#define MEM_HOLD_TIM(x)                (0x086 + ((x) << 2))
- #define TIM_CLOCKS(x)         ((x) - 1)
-
-#define MEM_TIM_SEL1           0x08c
-#define MEM_TIM_SEL2           0x08d
- #define MEM_WIN_TIMSEL1(x)    (0x03 << (((x) & 3) << 1))
-
-#define MEM_WIN_PWEN           0x091
- #define POSTWEN               0x01
-
-/*
- * CardBus Socket Registers
- */
-#define CARDBUS_SOCKET_REGS_BASE       0x000
-#define CARDBUS_SOCKET_REGS_SIZE       0x800
-
-#define SKT_EV                 0x000
- #define POW_CYC_EV            0x00000008
- #define CCD2_EV               0x00000004
- #define CCD1_EV               0x00000002
- #define CSTSCHG_EV            0x00000001
-
-#define SKT_MASK               0x004
- #define POW_CYC_MASK          0x00000008
- #define CCD_MASK              0x00000006
- #define CSC_MASK              0x00000001
-
-#define SKT_PRE_STATE          0x008
-#define SKT_FORCE_EV           0x00c
- #define VOL_3V_SKT            0x20000000
- #define VOL_5V_SKT            0x10000000
- #define CVS_TEST              0x00004000
- #define VOL_YV_CARD_DT                0x00002000
- #define VOL_XV_CARD_DT                0x00001000
- #define VOL_3V_CARD_DT                0x00000800
- #define VOL_5V_CARD_DT                0x00000400
- #define BAD_VCC_REQ           0x00000200
- #define DATA_LOST             0x00000100
- #define NOT_A_CARD            0x00000080
- #define CREADY                        0x00000040
- #define CB_CARD_DT            0x00000020
- #define R2_CARD_DT            0x00000010
- #define POW_UP                        0x00000008
- #define CCD20                 0x00000004
- #define CCD10                 0x00000002
- #define CSTSCHG               0x00000001
-
-#define SKT_CNT                        0x010
- #define STP_CLK_EN            0x00000080
- #define VCC_CNT_MASK          0x00000070
- #define VCC_CNT_3V            0x00000030
- #define VCC_CNT_5V            0x00000020
- #define VCC_CNT_0V            0x00000000
- #define VPP_CNT_MASK          0x00000007
- #define VPP_CNT_3V            0x00000003
- #define VPP_CNT_5V            0x00000002
- #define VPP_CNT_12V           0x00000001
- #define VPP_CNT_0V            0x00000000
-
-typedef struct vrc4173_socket {
-       int noprobe;
-       struct pci_dev *dev;
-       void *base;
-       void (*handler)(void *, unsigned int);
-       void *info;
-       socket_cap_t cap;
-       spinlock_t event_lock;
-       uint16_t events;
-       struct socket_info_t *pcmcia_socket;
-       struct work_struct tq_work;
-       char name[20];
-} vrc4173_socket_t;
-
-#endif /* _VRC4173_CARDU_H */
index 0ecee8b..7c92a6e 100644 (file)
@@ -742,12 +742,16 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev,
                 * Sensor events need to be parsed by the sensor sub-device.
                 * Defer them, and don't report the wakeup here.
                 */
-               if (event_type == EC_MKBP_EVENT_SENSOR_FIFO)
-                       *wake_event = false;
-               /* Masked host-events should not count as wake events. */
-               else if (host_event &&
-                        !(host_event & ec_dev->host_event_wake_mask))
+               if (event_type == EC_MKBP_EVENT_SENSOR_FIFO) {
                        *wake_event = false;
+               } else if (host_event) {
+                       /* rtc_update_irq() already handles wakeup events. */
+                       if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_RTC))
+                               *wake_event = false;
+                       /* Masked host-events should not count as wake events. */
+                       if (!(host_event & ec_dev->host_event_wake_mask))
+                               *wake_event = false;
+               }
        }
 
        return ret;
index 8111ed1..c438686 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_data/cros_ec_commands.h>
@@ -14,6 +15,7 @@
 #include <linux/platform_data/cros_usbpd_notify.h>
 #include <linux/platform_device.h>
 #include <linux/usb/pd.h>
+#include <linux/usb/pd_vdo.h>
 #include <linux/usb/typec.h>
 #include <linux/usb/typec_altmode.h>
 #include <linux/usb/typec_dp.h>
@@ -30,6 +32,12 @@ enum {
        CROS_EC_ALTMODE_MAX,
 };
 
+/* Container for altmode pointer nodes. */
+struct cros_typec_altmode_node {
+       struct typec_altmode *amode;
+       struct list_head list;
+};
+
 /* Per port data. */
 struct cros_typec_port {
        struct typec_port *port;
@@ -48,6 +56,11 @@ struct cros_typec_port {
 
        /* Port alt modes. */
        struct typec_altmode p_altmode[CROS_EC_ALTMODE_MAX];
+
+       /* Flag indicating that PD discovery data parsing is completed. */
+       bool disc_done;
+       struct ec_response_typec_discovery *sop_disc;
+       struct list_head partner_mode_list;
 };
 
 /* Platform-specific data for the Chrome OS EC Type C controller. */
@@ -60,6 +73,7 @@ struct cros_typec_data {
        struct cros_typec_port *ports[EC_USB_PD_MAX_PORTS];
        struct notifier_block nb;
        struct work_struct port_work;
+       bool typec_cmd_supported;
 };
 
 static int cros_typec_parse_port_props(struct typec_capability *cap,
@@ -166,11 +180,25 @@ static int cros_typec_add_partner(struct cros_typec_data *typec, int port_num,
        return ret;
 }
 
+static void cros_typec_unregister_altmodes(struct cros_typec_data *typec, int port_num)
+{
+       struct cros_typec_port *port = typec->ports[port_num];
+       struct cros_typec_altmode_node *node, *tmp;
+
+       list_for_each_entry_safe(node, tmp, &port->partner_mode_list, list) {
+               list_del(&node->list);
+               typec_unregister_altmode(node->amode);
+               devm_kfree(typec->dev, node);
+       }
+}
+
 static void cros_typec_remove_partner(struct cros_typec_data *typec,
                                     int port_num)
 {
        struct cros_typec_port *port = typec->ports[port_num];
 
+       cros_typec_unregister_altmodes(typec, port_num);
+
        port->state.alt = NULL;
        port->state.mode = TYPEC_STATE_USB;
        port->state.data = NULL;
@@ -181,6 +209,8 @@ static void cros_typec_remove_partner(struct cros_typec_data *typec,
 
        typec_unregister_partner(port->partner);
        port->partner = NULL;
+       memset(&port->p_identity, 0, sizeof(port->p_identity));
+       port->disc_done = false;
 }
 
 static void cros_unregister_ports(struct cros_typec_data *typec)
@@ -190,7 +220,10 @@ static void cros_unregister_ports(struct cros_typec_data *typec)
        for (i = 0; i < typec->num_ports; i++) {
                if (!typec->ports[i])
                        continue;
-               cros_typec_remove_partner(typec, i);
+
+               if (typec->ports[i]->partner)
+                       cros_typec_remove_partner(typec, i);
+
                usb_role_switch_put(typec->ports[i]->role_sw);
                typec_switch_put(typec->ports[i]->ori_sw);
                typec_mux_put(typec->ports[i]->mux);
@@ -289,6 +322,14 @@ static int cros_typec_init_ports(struct cros_typec_data *typec)
                                port_num);
 
                cros_typec_register_port_altmodes(typec, port_num);
+
+               cros_port->sop_disc = devm_kzalloc(dev, EC_PROTO2_MAX_RESPONSE_SIZE, GFP_KERNEL);
+               if (!cros_port->sop_disc) {
+                       ret = -ENOMEM;
+                       goto unregister_ports;
+               }
+
+               INIT_LIST_HEAD(&cros_port->partner_mode_list);
        }
 
        return 0;
@@ -329,74 +370,6 @@ static int cros_typec_ec_command(struct cros_typec_data *typec,
        return ret;
 }
 
-static void cros_typec_set_port_params_v0(struct cros_typec_data *typec,
-               int port_num, struct ec_response_usb_pd_control *resp)
-{
-       struct typec_port *port = typec->ports[port_num]->port;
-       enum typec_orientation polarity;
-
-       if (!resp->enabled)
-               polarity = TYPEC_ORIENTATION_NONE;
-       else if (!resp->polarity)
-               polarity = TYPEC_ORIENTATION_NORMAL;
-       else
-               polarity = TYPEC_ORIENTATION_REVERSE;
-
-       typec_set_pwr_role(port, resp->role ? TYPEC_SOURCE : TYPEC_SINK);
-       typec_set_orientation(port, polarity);
-}
-
-static void cros_typec_set_port_params_v1(struct cros_typec_data *typec,
-               int port_num, struct ec_response_usb_pd_control_v1 *resp)
-{
-       struct typec_port *port = typec->ports[port_num]->port;
-       enum typec_orientation polarity;
-       bool pd_en;
-       int ret;
-
-       if (!(resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED))
-               polarity = TYPEC_ORIENTATION_NONE;
-       else if (!resp->polarity)
-               polarity = TYPEC_ORIENTATION_NORMAL;
-       else
-               polarity = TYPEC_ORIENTATION_REVERSE;
-       typec_set_orientation(port, polarity);
-       typec_set_data_role(port, resp->role & PD_CTRL_RESP_ROLE_DATA ?
-                       TYPEC_HOST : TYPEC_DEVICE);
-       typec_set_pwr_role(port, resp->role & PD_CTRL_RESP_ROLE_POWER ?
-                       TYPEC_SOURCE : TYPEC_SINK);
-       typec_set_vconn_role(port, resp->role & PD_CTRL_RESP_ROLE_VCONN ?
-                       TYPEC_SOURCE : TYPEC_SINK);
-
-       /* Register/remove partners when a connect/disconnect occurs. */
-       if (resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED) {
-               if (typec->ports[port_num]->partner)
-                       return;
-
-               pd_en = resp->enabled & PD_CTRL_RESP_ENABLED_PD_CAPABLE;
-               ret = cros_typec_add_partner(typec, port_num, pd_en);
-               if (ret)
-                       dev_warn(typec->dev,
-                                "Failed to register partner on port: %d\n",
-                                port_num);
-       } else {
-               if (!typec->ports[port_num]->partner)
-                       return;
-               cros_typec_remove_partner(typec, port_num);
-       }
-}
-
-static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num,
-                                  struct ec_response_usb_pd_mux_info *resp)
-{
-       struct ec_params_usb_pd_mux_info req = {
-               .port = port_num,
-       };
-
-       return cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_MUX_INFO, &req,
-                                    sizeof(req), resp, sizeof(*resp));
-}
-
 static int cros_typec_usb_safe_state(struct cros_typec_port *port)
 {
        port->state.mode = TYPEC_STATE_SAFE;
@@ -563,15 +536,210 @@ static int cros_typec_configure_mux(struct cros_typec_data *typec, int port_num,
                port->state.mode = TYPEC_STATE_USB;
                ret = typec_mux_set(port->mux, &port->state);
        } else {
-               dev_info(typec->dev,
-                        "Unsupported mode requested, mux flags: %x\n",
-                        mux_flags);
-               ret = -ENOTSUPP;
+               dev_dbg(typec->dev,
+                       "Unrecognized mode requested, mux flags: %x\n",
+                       mux_flags);
+       }
+
+       return ret;
+}
+
+static void cros_typec_set_port_params_v0(struct cros_typec_data *typec,
+               int port_num, struct ec_response_usb_pd_control *resp)
+{
+       struct typec_port *port = typec->ports[port_num]->port;
+       enum typec_orientation polarity;
+
+       if (!resp->enabled)
+               polarity = TYPEC_ORIENTATION_NONE;
+       else if (!resp->polarity)
+               polarity = TYPEC_ORIENTATION_NORMAL;
+       else
+               polarity = TYPEC_ORIENTATION_REVERSE;
+
+       typec_set_pwr_role(port, resp->role ? TYPEC_SOURCE : TYPEC_SINK);
+       typec_set_orientation(port, polarity);
+}
+
+static void cros_typec_set_port_params_v1(struct cros_typec_data *typec,
+               int port_num, struct ec_response_usb_pd_control_v1 *resp)
+{
+       struct typec_port *port = typec->ports[port_num]->port;
+       enum typec_orientation polarity;
+       bool pd_en;
+       int ret;
+
+       if (!(resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED))
+               polarity = TYPEC_ORIENTATION_NONE;
+       else if (!resp->polarity)
+               polarity = TYPEC_ORIENTATION_NORMAL;
+       else
+               polarity = TYPEC_ORIENTATION_REVERSE;
+       typec_set_orientation(port, polarity);
+       typec_set_data_role(port, resp->role & PD_CTRL_RESP_ROLE_DATA ?
+                       TYPEC_HOST : TYPEC_DEVICE);
+       typec_set_pwr_role(port, resp->role & PD_CTRL_RESP_ROLE_POWER ?
+                       TYPEC_SOURCE : TYPEC_SINK);
+       typec_set_vconn_role(port, resp->role & PD_CTRL_RESP_ROLE_VCONN ?
+                       TYPEC_SOURCE : TYPEC_SINK);
+
+       /* Register/remove partners when a connect/disconnect occurs. */
+       if (resp->enabled & PD_CTRL_RESP_ENABLED_CONNECTED) {
+               if (typec->ports[port_num]->partner)
+                       return;
+
+               pd_en = resp->enabled & PD_CTRL_RESP_ENABLED_PD_CAPABLE;
+               ret = cros_typec_add_partner(typec, port_num, pd_en);
+               if (ret)
+                       dev_warn(typec->dev,
+                                "Failed to register partner on port: %d\n",
+                                port_num);
+       } else {
+               if (!typec->ports[port_num]->partner)
+                       return;
+               cros_typec_remove_partner(typec, port_num);
        }
+}
+
+static int cros_typec_get_mux_info(struct cros_typec_data *typec, int port_num,
+                                  struct ec_response_usb_pd_mux_info *resp)
+{
+       struct ec_params_usb_pd_mux_info req = {
+               .port = port_num,
+       };
+
+       return cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_MUX_INFO, &req,
+                                    sizeof(req), resp, sizeof(*resp));
+}
+
+static int cros_typec_register_altmodes(struct cros_typec_data *typec, int port_num)
+{
+       struct cros_typec_port *port = typec->ports[port_num];
+       struct ec_response_typec_discovery *sop_disc = port->sop_disc;
+       struct cros_typec_altmode_node *node;
+       struct typec_altmode_desc desc;
+       struct typec_altmode *amode;
+       int ret = 0;
+       int i, j;
+
+       for (i = 0; i < sop_disc->svid_count; i++) {
+               for (j = 0; j < sop_disc->svids[i].mode_count; j++) {
+                       memset(&desc, 0, sizeof(desc));
+                       desc.svid = sop_disc->svids[i].svid;
+                       desc.mode = j;
+                       desc.vdo = sop_disc->svids[i].mode_vdo[j];
+
+                       amode = typec_partner_register_altmode(port->partner, &desc);
+                       if (IS_ERR(amode)) {
+                               ret = PTR_ERR(amode);
+                               goto err_cleanup;
+                       }
+
+                       /* If no memory is available we should unregister and exit. */
+                       node = devm_kzalloc(typec->dev, sizeof(*node), GFP_KERNEL);
+                       if (!node) {
+                               ret = -ENOMEM;
+                               typec_unregister_altmode(amode);
+                               goto err_cleanup;
+                       }
+
+                       node->amode = amode;
+                       list_add_tail(&node->list, &port->partner_mode_list);
+               }
+       }
+
+       return 0;
 
+err_cleanup:
+       cros_typec_unregister_altmodes(typec, port_num);
        return ret;
 }
 
+static int cros_typec_handle_sop_disc(struct cros_typec_data *typec, int port_num)
+{
+       struct cros_typec_port *port = typec->ports[port_num];
+       struct ec_response_typec_discovery *sop_disc = port->sop_disc;
+       struct ec_params_typec_discovery req = {
+               .port = port_num,
+               .partner_type = TYPEC_PARTNER_SOP,
+       };
+       int ret = 0;
+       int i;
+
+       if (!port->partner) {
+               dev_err(typec->dev,
+                       "SOP Discovery received without partner registered, port: %d\n",
+                       port_num);
+               ret = -EINVAL;
+               goto disc_exit;
+       }
+
+       memset(sop_disc, 0, EC_PROTO2_MAX_RESPONSE_SIZE);
+       ret = cros_typec_ec_command(typec, 0, EC_CMD_TYPEC_DISCOVERY, &req, sizeof(req),
+                                   sop_disc, EC_PROTO2_MAX_RESPONSE_SIZE);
+       if (ret < 0) {
+               dev_err(typec->dev, "Failed to get SOP discovery data for port: %d\n", port_num);
+               goto disc_exit;
+       }
+
+       /* First, update the PD identity VDOs for the partner. */
+       if (sop_disc->identity_count > 0)
+               port->p_identity.id_header = sop_disc->discovery_vdo[0];
+       if (sop_disc->identity_count > 1)
+               port->p_identity.cert_stat = sop_disc->discovery_vdo[1];
+       if (sop_disc->identity_count > 2)
+               port->p_identity.product = sop_disc->discovery_vdo[2];
+
+       /* Copy the remaining identity VDOs till a maximum of 6. */
+       for (i = 3; i < sop_disc->identity_count && i < VDO_MAX_OBJECTS; i++)
+               port->p_identity.vdo[i - 3] = sop_disc->discovery_vdo[i];
+
+       ret = typec_partner_set_identity(port->partner);
+       if (ret < 0) {
+               dev_err(typec->dev, "Failed to update partner PD identity, port: %d\n", port_num);
+               goto disc_exit;
+       }
+
+       ret = cros_typec_register_altmodes(typec, port_num);
+       if (ret < 0) {
+               dev_err(typec->dev, "Failed to register partner altmodes, port: %d\n", port_num);
+               goto disc_exit;
+       }
+
+disc_exit:
+       return ret;
+}
+
+static void cros_typec_handle_status(struct cros_typec_data *typec, int port_num)
+{
+       struct ec_response_typec_status resp;
+       struct ec_params_typec_status req = {
+               .port = port_num,
+       };
+       int ret;
+
+       ret = cros_typec_ec_command(typec, 0, EC_CMD_TYPEC_STATUS, &req, sizeof(req),
+                                   &resp, sizeof(resp));
+       if (ret < 0) {
+               dev_warn(typec->dev, "EC_CMD_TYPEC_STATUS failed for port: %d\n", port_num);
+               return;
+       }
+
+       if (typec->ports[port_num]->disc_done)
+               return;
+
+       /* Handle any events appropriately. */
+       if (resp.events & PD_STATUS_EVENT_SOP_DISC_DONE) {
+               ret = cros_typec_handle_sop_disc(typec, port_num);
+               if (ret < 0) {
+                       dev_err(typec->dev, "Couldn't parse SOP Disc data, port: %d\n", port_num);
+                       return;
+               }
+
+               typec->ports[port_num]->disc_done = true;
+       }
+}
+
 static int cros_typec_port_update(struct cros_typec_data *typec, int port_num)
 {
        struct ec_params_usb_pd_control req;
@@ -608,6 +776,9 @@ static int cros_typec_port_update(struct cros_typec_data *typec, int port_num)
                cros_typec_set_port_params_v0(typec, port_num,
                        (struct ec_response_usb_pd_control *) &resp);
 
+       if (typec->typec_cmd_supported)
+               cros_typec_handle_status(typec, port_num);
+
        /* Update the switches if they exist, according to requested state */
        ret = cros_typec_get_mux_info(typec, port_num, &mux_resp);
        if (ret < 0) {
@@ -656,6 +827,23 @@ static int cros_typec_get_cmd_version(struct cros_typec_data *typec)
        return 0;
 }
 
+/* Check the EC feature flags to see if TYPEC_* commands are supported. */
+static int cros_typec_cmds_supported(struct cros_typec_data *typec)
+{
+       struct ec_response_get_features resp = {};
+       int ret;
+
+       ret = cros_typec_ec_command(typec, 0, EC_CMD_GET_FEATURES, NULL, 0,
+                                   &resp, sizeof(resp));
+       if (ret < 0) {
+               dev_warn(typec->dev,
+                        "Failed to get features, assuming typec commands unsupported.\n");
+               return 0;
+       }
+
+       return resp.flags[EC_FEATURE_TYPEC_CMD / 32] & EC_FEATURE_MASK_1(EC_FEATURE_TYPEC_CMD);
+}
+
 static void cros_typec_port_work(struct work_struct *work)
 {
        struct cros_typec_data *typec = container_of(work, struct cros_typec_data, port_work);
@@ -715,6 +903,8 @@ static int cros_typec_probe(struct platform_device *pdev)
                return ret;
        }
 
+       typec->typec_cmd_supported = !!cros_typec_cmds_supported(typec);
+
        ret = cros_typec_ec_command(typec, 0, EC_CMD_USB_PD_PORTS, NULL, 0,
                                    &resp, sizeof(resp));
        if (ret < 0)
index d55b372..b22c4fd 100644 (file)
@@ -177,6 +177,13 @@ config POWER_RESET_QNAP
 
          Say Y if you have a QNAP NAS.
 
+config POWER_RESET_REGULATOR
+       bool "Regulator subsystem power-off driver"
+       depends on OF && REGULATOR
+       help
+         This driver supports turning off your board by disabling a
+         power regulator defined in the devicetree.
+
 config POWER_RESET_RESTART
        bool "Restart power-off driver"
        help
index c51eceb..9dc49d3 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_POWER_RESET_OCELOT_RESET) += ocelot-reset.o
 obj-$(CONFIG_POWER_RESET_PIIX4_POWEROFF) += piix4-poweroff.o
 obj-$(CONFIG_POWER_RESET_LTC2952) += ltc2952-poweroff.o
 obj-$(CONFIG_POWER_RESET_QNAP) += qnap-poweroff.o
+obj-$(CONFIG_POWER_RESET_REGULATOR) += regulator-poweroff.o
 obj-$(CONFIG_POWER_RESET_RESTART) += restart-poweroff.o
 obj-$(CONFIG_POWER_RESET_ST) += st-poweroff.o
 obj-$(CONFIG_POWER_RESET_VERSATILE) += arm-versatile-reboot.o
index f74e1db..8caa90c 100644 (file)
@@ -29,6 +29,8 @@ struct ocelot_reset_context {
        struct notifier_block restart_handler;
 };
 
+#define BIT_OFF_INVALID                                32
+
 #define SOFT_CHIP_RST BIT(0)
 
 #define ICPU_CFG_CPU_SYSTEM_CTRL_GENERAL_CTRL  0x24
@@ -50,9 +52,11 @@ static int ocelot_restart_handle(struct notifier_block *this,
                           ctx->props->vcore_protect, 0);
 
        /* Make the SI back to boot mode */
-       regmap_update_bits(ctx->cpu_ctrl, ICPU_CFG_CPU_SYSTEM_CTRL_GENERAL_CTRL,
-                          IF_SI_OWNER_MASK << if_si_owner_bit,
-                          IF_SI_OWNER_SIBM << if_si_owner_bit);
+       if (if_si_owner_bit != BIT_OFF_INVALID)
+               regmap_update_bits(ctx->cpu_ctrl,
+                                  ICPU_CFG_CPU_SYSTEM_CTRL_GENERAL_CTRL,
+                                  IF_SI_OWNER_MASK << if_si_owner_bit,
+                                  IF_SI_OWNER_SIBM << if_si_owner_bit);
 
        pr_emerg("Resetting SoC\n");
 
@@ -96,6 +100,20 @@ static int ocelot_reset_probe(struct platform_device *pdev)
        return err;
 }
 
+static const struct reset_props reset_props_jaguar2 = {
+       .syscon          = "mscc,ocelot-cpu-syscon",
+       .protect_reg     = 0x20,
+       .vcore_protect   = BIT(2),
+       .if_si_owner_bit = 6,
+};
+
+static const struct reset_props reset_props_luton = {
+       .syscon          = "mscc,ocelot-cpu-syscon",
+       .protect_reg     = 0x20,
+       .vcore_protect   = BIT(2),
+       .if_si_owner_bit = BIT_OFF_INVALID, /* n/a */
+};
+
 static const struct reset_props reset_props_ocelot = {
        .syscon          = "mscc,ocelot-cpu-syscon",
        .protect_reg     = 0x20,
@@ -112,6 +130,12 @@ static const struct reset_props reset_props_sparx5 = {
 
 static const struct of_device_id ocelot_reset_of_match[] = {
        {
+               .compatible = "mscc,jaguar2-chip-reset",
+               .data = &reset_props_jaguar2
+       }, {
+               .compatible = "mscc,luton-chip-reset",
+               .data = &reset_props_luton
+       }, {
                .compatible = "mscc,ocelot-chip-reset",
                .data = &reset_props_ocelot
        }, {
index 52b7dc6..0ddf7f2 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/serial_reg.h>
-#include <linux/kallsyms.h>
 #include <linux/of.h>
 #include <linux/io.h>
 #include <linux/clk.h>
@@ -75,7 +74,6 @@ static int qnap_power_off_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct resource *res;
        struct clk *clk;
-       char symname[KSYM_NAME_LEN];
 
        const struct of_device_id *match =
                of_match_node(qnap_power_off_of_match_table, np);
@@ -104,10 +102,8 @@ static int qnap_power_off_probe(struct platform_device *pdev)
 
        /* Check that nothing else has already setup a handler */
        if (pm_power_off) {
-               lookup_symbol_name((ulong)pm_power_off, symname);
-               dev_err(&pdev->dev,
-                       "pm_power_off already claimed %p %s",
-                       pm_power_off, symname);
+               dev_err(&pdev->dev, "pm_power_off already claimed for %ps",
+                       pm_power_off);
                return -EBUSY;
        }
        pm_power_off = qnap_power_off;
diff --git a/drivers/power/reset/regulator-poweroff.c b/drivers/power/reset/regulator-poweroff.c
new file mode 100644 (file)
index 0000000..f697088
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Force-disables a regulator to power down a device
+ *
+ * Michael Klein <michael@fossekall.de>
+ *
+ * Copyright (C) 2020 Michael Klein
+ *
+ * Based on the gpio-poweroff driver.
+ */
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/regulator/consumer.h>
+
+#define TIMEOUT_MS 3000
+
+/*
+ * Hold configuration here, cannot be more than one instance of the driver
+ * since pm_power_off itself is global.
+ */
+static struct regulator *cpu_regulator;
+
+static void regulator_poweroff_do_poweroff(void)
+{
+       if (cpu_regulator && regulator_is_enabled(cpu_regulator))
+               regulator_force_disable(cpu_regulator);
+
+       /* give it some time */
+       mdelay(TIMEOUT_MS);
+
+       WARN_ON(1);
+}
+
+static int regulator_poweroff_probe(struct platform_device *pdev)
+{
+       /* If a pm_power_off function has already been added, leave it alone */
+       if (pm_power_off != NULL) {
+               dev_err(&pdev->dev,
+                       "%s: pm_power_off function already registered\n",
+                       __func__);
+               return -EBUSY;
+       }
+
+       cpu_regulator = devm_regulator_get(&pdev->dev, "cpu");
+       if (IS_ERR(cpu_regulator))
+               return PTR_ERR(cpu_regulator);
+
+       pm_power_off = &regulator_poweroff_do_poweroff;
+       return 0;
+}
+
+static int regulator_poweroff_remove(__maybe_unused struct platform_device *pdev)
+{
+       if (pm_power_off == &regulator_poweroff_do_poweroff)
+               pm_power_off = NULL;
+
+       return 0;
+}
+
+static const struct of_device_id of_regulator_poweroff_match[] = {
+       { .compatible = "regulator-poweroff", },
+       {},
+};
+
+static struct platform_driver regulator_poweroff_driver = {
+       .probe = regulator_poweroff_probe,
+       .remove = regulator_poweroff_remove,
+       .driver = {
+               .name = "poweroff-regulator",
+               .of_match_table = of_regulator_poweroff_match,
+       },
+};
+
+module_platform_driver(regulator_poweroff_driver);
+
+MODULE_AUTHOR("Michael Klein <michael@fossekall.de>");
+MODULE_DESCRIPTION("Regulator poweroff driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:poweroff-regulator");
index 4d6923b..ed58bdf 100644 (file)
@@ -6,7 +6,6 @@
  * Author: Moritz Fischer <moritz.fischer@ettus.com>
  */
 
-#include <linux/kallsyms.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/notifier.h>
@@ -34,7 +33,6 @@ static void syscon_poweroff(void)
 
 static int syscon_poweroff_probe(struct platform_device *pdev)
 {
-       char symname[KSYM_NAME_LEN];
        int mask_err, value_err;
 
        map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "regmap");
@@ -65,10 +63,8 @@ static int syscon_poweroff_probe(struct platform_device *pdev)
        }
 
        if (pm_power_off) {
-               lookup_symbol_name((ulong)pm_power_off, symname);
-               dev_err(&pdev->dev,
-               "pm_power_off already claimed %p %s",
-               pm_power_off, symname);
+               dev_err(&pdev->dev, "pm_power_off already claimed for %ps",
+                       pm_power_off);
                return -EBUSY;
        }
 
index 909f024..d203453 100644 (file)
@@ -936,29 +936,23 @@ static struct ab8500_btemp_interrupts ab8500_btemp_irq[] = {
        {"BTEMP_MEDIUM_HIGH", ab8500_btemp_medhigh_handler},
 };
 
-#if defined(CONFIG_PM)
-static int ab8500_btemp_resume(struct platform_device *pdev)
+static int __maybe_unused ab8500_btemp_resume(struct device *dev)
 {
-       struct ab8500_btemp *di = platform_get_drvdata(pdev);
+       struct ab8500_btemp *di = dev_get_drvdata(dev);
 
        ab8500_btemp_periodic(di, true);
 
        return 0;
 }
 
-static int ab8500_btemp_suspend(struct platform_device *pdev,
-       pm_message_t state)
+static int __maybe_unused ab8500_btemp_suspend(struct device *dev)
 {
-       struct ab8500_btemp *di = platform_get_drvdata(pdev);
+       struct ab8500_btemp *di = dev_get_drvdata(dev);
 
        ab8500_btemp_periodic(di, false);
 
        return 0;
 }
-#else
-#define ab8500_btemp_suspend      NULL
-#define ab8500_btemp_resume       NULL
-#endif
 
 static int ab8500_btemp_remove(struct platform_device *pdev)
 {
@@ -999,48 +993,45 @@ static int ab8500_btemp_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct abx500_bm_data *plat = pdev->dev.platform_data;
        struct power_supply_config psy_cfg = {};
+       struct device *dev = &pdev->dev;
        struct ab8500_btemp *di;
        int irq, i, ret = 0;
        u8 val;
 
-       di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
-       if (!di) {
-               dev_err(&pdev->dev, "%s no mem for ab8500_btemp\n", __func__);
+       di = devm_kzalloc(dev, sizeof(*di), GFP_KERNEL);
+       if (!di)
                return -ENOMEM;
-       }
 
        if (!plat) {
-               dev_err(&pdev->dev, "no battery management data supplied\n");
+               dev_err(dev, "no battery management data supplied\n");
                return -EINVAL;
        }
        di->bm = plat;
 
        if (np) {
-               ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
+               ret = ab8500_bm_of_probe(dev, np, di->bm);
                if (ret) {
-                       dev_err(&pdev->dev, "failed to get battery information\n");
+                       dev_err(dev, "failed to get battery information\n");
                        return ret;
                }
        }
 
        /* get parent data */
-       di->dev = &pdev->dev;
+       di->dev = dev;
        di->parent = dev_get_drvdata(pdev->dev.parent);
 
        /* Get ADC channels */
-       di->btemp_ball = devm_iio_channel_get(&pdev->dev, "btemp_ball");
+       di->btemp_ball = devm_iio_channel_get(dev, "btemp_ball");
        if (IS_ERR(di->btemp_ball)) {
-               if (PTR_ERR(di->btemp_ball) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get BTEMP BALL ADC channel\n");
-               return PTR_ERR(di->btemp_ball);
+               ret = dev_err_probe(dev, PTR_ERR(di->btemp_ball),
+                                   "failed to get BTEMP BALL ADC channel\n");
+               return ret;
        }
-       di->bat_ctrl = devm_iio_channel_get(&pdev->dev, "bat_ctrl");
+       di->bat_ctrl = devm_iio_channel_get(dev, "bat_ctrl");
        if (IS_ERR(di->bat_ctrl)) {
-               if (PTR_ERR(di->bat_ctrl) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get BAT CTRL ADC channel\n");
-               return PTR_ERR(di->bat_ctrl);
+               ret = dev_err_probe(dev, PTR_ERR(di->bat_ctrl),
+                                   "failed to get BAT CTRL ADC channel\n");
+               return ret;
        }
 
        di->initialized = false;
@@ -1053,7 +1044,7 @@ static int ab8500_btemp_probe(struct platform_device *pdev)
        di->btemp_wq =
                alloc_workqueue("ab8500_btemp_wq", WQ_MEM_RECLAIM, 0);
        if (di->btemp_wq == NULL) {
-               dev_err(di->dev, "failed to create work queue\n");
+               dev_err(dev, "failed to create work queue\n");
                return -ENOMEM;
        }
 
@@ -1065,10 +1056,10 @@ static int ab8500_btemp_probe(struct platform_device *pdev)
        di->btemp_ranges.btemp_low_limit = BTEMP_THERMAL_LOW_LIMIT;
        di->btemp_ranges.btemp_med_limit = BTEMP_THERMAL_MED_LIMIT;
 
-       ret = abx500_get_register_interruptible(di->dev, AB8500_CHARGER,
+       ret = abx500_get_register_interruptible(dev, AB8500_CHARGER,
                AB8500_BTEMP_HIGH_TH, &val);
        if (ret < 0) {
-               dev_err(di->dev, "%s ab8500 read failed\n", __func__);
+               dev_err(dev, "%s ab8500 read failed\n", __func__);
                goto free_btemp_wq;
        }
        switch (val) {
@@ -1088,10 +1079,10 @@ static int ab8500_btemp_probe(struct platform_device *pdev)
        }
 
        /* Register BTEMP power supply class */
-       di->btemp_psy = power_supply_register(di->dev, &ab8500_btemp_desc,
+       di->btemp_psy = power_supply_register(dev, &ab8500_btemp_desc,
                                              &psy_cfg);
        if (IS_ERR(di->btemp_psy)) {
-               dev_err(di->dev, "failed to register BTEMP psy\n");
+               dev_err(dev, "failed to register BTEMP psy\n");
                ret = PTR_ERR(di->btemp_psy);
                goto free_btemp_wq;
        }
@@ -1105,15 +1096,15 @@ static int ab8500_btemp_probe(struct platform_device *pdev)
                }
 
                ret = request_threaded_irq(irq, NULL, ab8500_btemp_irq[i].isr,
-                       IRQF_SHARED | IRQF_NO_SUSPEND,
+                       IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
                        ab8500_btemp_irq[i].name, di);
 
                if (ret) {
-                       dev_err(di->dev, "failed to request %s IRQ %d: %d\n"
+                       dev_err(dev, "failed to request %s IRQ %d: %d\n"
                                , ab8500_btemp_irq[i].name, irq, ret);
                        goto free_irq;
                }
-               dev_dbg(di->dev, "Requested %s IRQ %d: %d\n",
+               dev_dbg(dev, "Requested %s IRQ %d: %d\n",
                        ab8500_btemp_irq[i].name, irq, ret);
        }
 
@@ -1138,6 +1129,8 @@ free_btemp_wq:
        return ret;
 }
 
+static SIMPLE_DEV_PM_OPS(ab8500_btemp_pm_ops, ab8500_btemp_suspend, ab8500_btemp_resume);
+
 static const struct of_device_id ab8500_btemp_match[] = {
        { .compatible = "stericsson,ab8500-btemp", },
        { },
@@ -1146,11 +1139,10 @@ static const struct of_device_id ab8500_btemp_match[] = {
 static struct platform_driver ab8500_btemp_driver = {
        .probe = ab8500_btemp_probe,
        .remove = ab8500_btemp_remove,
-       .suspend = ab8500_btemp_suspend,
-       .resume = ab8500_btemp_resume,
        .driver = {
                .name = "ab8500-btemp",
                .of_match_table = ab8500_btemp_match,
+               .pm = &ab8500_btemp_pm_ops,
        },
 };
 
index db65be0..ac77c88 100644 (file)
@@ -3209,11 +3209,10 @@ static int ab8500_charger_usb_notifier_call(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-#if defined(CONFIG_PM)
-static int ab8500_charger_resume(struct platform_device *pdev)
+static int __maybe_unused ab8500_charger_resume(struct device *dev)
 {
        int ret;
-       struct ab8500_charger *di = platform_get_drvdata(pdev);
+       struct ab8500_charger *di = dev_get_drvdata(dev);
 
        /*
         * For ABB revision 1.0 and 1.1 there is a bug in the watchdog
@@ -3247,10 +3246,9 @@ static int ab8500_charger_resume(struct platform_device *pdev)
        return 0;
 }
 
-static int ab8500_charger_suspend(struct platform_device *pdev,
-       pm_message_t state)
+static int __maybe_unused ab8500_charger_suspend(struct device *dev)
 {
-       struct ab8500_charger *di = platform_get_drvdata(pdev);
+       struct ab8500_charger *di = dev_get_drvdata(dev);
 
        /* Cancel any pending jobs */
        cancel_delayed_work(&di->check_hw_failure_work);
@@ -3272,10 +3270,6 @@ static int ab8500_charger_suspend(struct platform_device *pdev,
 
        return 0;
 }
-#else
-#define ab8500_charger_suspend      NULL
-#define ab8500_charger_resume       NULL
-#endif
 
 static struct notifier_block charger_nb = {
        .notifier_call = ab8500_external_charger_prepare,
@@ -3354,23 +3348,22 @@ static int ab8500_charger_probe(struct platform_device *pdev)
        struct power_supply_config ac_psy_cfg = {}, usb_psy_cfg = {};
        struct ab8500_charger *di;
        int irq, i, charger_status, ret = 0, ch_stat;
+       struct device *dev = &pdev->dev;
 
-       di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
-       if (!di) {
-               dev_err(&pdev->dev, "%s no mem for ab8500_charger\n", __func__);
+       di = devm_kzalloc(dev, sizeof(*di), GFP_KERNEL);
+       if (!di)
                return -ENOMEM;
-       }
 
        if (!plat) {
-               dev_err(&pdev->dev, "no battery management data supplied\n");
+               dev_err(dev, "no battery management data supplied\n");
                return -EINVAL;
        }
        di->bm = plat;
 
        if (np) {
-               ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
+               ret = ab8500_bm_of_probe(dev, np, di->bm);
                if (ret) {
-                       dev_err(&pdev->dev, "failed to get battery information\n");
+                       dev_err(dev, "failed to get battery information\n");
                        return ret;
                }
                di->autopower_cfg = of_property_read_bool(np, "autopower_cfg");
@@ -3378,40 +3371,33 @@ static int ab8500_charger_probe(struct platform_device *pdev)
                di->autopower_cfg = false;
 
        /* get parent data */
-       di->dev = &pdev->dev;
+       di->dev = dev;
        di->parent = dev_get_drvdata(pdev->dev.parent);
 
        /* Get ADC channels */
-       di->adc_main_charger_v = devm_iio_channel_get(&pdev->dev,
-                                                     "main_charger_v");
+       di->adc_main_charger_v = devm_iio_channel_get(dev, "main_charger_v");
        if (IS_ERR(di->adc_main_charger_v)) {
-               if (PTR_ERR(di->adc_main_charger_v) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get ADC main charger voltage\n");
-               return PTR_ERR(di->adc_main_charger_v);
+               ret = dev_err_probe(dev, PTR_ERR(di->adc_main_charger_v),
+                                   "failed to get ADC main charger voltage\n");
+               return ret;
        }
-       di->adc_main_charger_c = devm_iio_channel_get(&pdev->dev,
-                                                     "main_charger_c");
+       di->adc_main_charger_c = devm_iio_channel_get(dev, "main_charger_c");
        if (IS_ERR(di->adc_main_charger_c)) {
-               if (PTR_ERR(di->adc_main_charger_c) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get ADC main charger current\n");
-               return PTR_ERR(di->adc_main_charger_c);
+               ret = dev_err_probe(dev, PTR_ERR(di->adc_main_charger_c),
+                                   "failed to get ADC main charger current\n");
+               return ret;
        }
-       di->adc_vbus_v = devm_iio_channel_get(&pdev->dev, "vbus_v");
+       di->adc_vbus_v = devm_iio_channel_get(dev, "vbus_v");
        if (IS_ERR(di->adc_vbus_v)) {
-               if (PTR_ERR(di->adc_vbus_v) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get ADC USB charger voltage\n");
-               return PTR_ERR(di->adc_vbus_v);
+               ret = dev_err_probe(dev, PTR_ERR(di->adc_vbus_v),
+                                   "failed to get ADC USB charger voltage\n");
+               return ret;
        }
-       di->adc_usb_charger_c = devm_iio_channel_get(&pdev->dev,
-                                                    "usb_charger_c");
+       di->adc_usb_charger_c = devm_iio_channel_get(dev, "usb_charger_c");
        if (IS_ERR(di->adc_usb_charger_c)) {
-               if (PTR_ERR(di->adc_usb_charger_c) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get ADC USB charger current\n");
-               return PTR_ERR(di->adc_usb_charger_c);
+               ret = dev_err_probe(dev, PTR_ERR(di->adc_usb_charger_c),
+                                   "failed to get ADC USB charger current\n");
+               return ret;
        }
 
        /* initialize lock */
@@ -3467,7 +3453,7 @@ static int ab8500_charger_probe(struct platform_device *pdev)
        di->charger_wq = alloc_ordered_workqueue("ab8500_charger_wq",
                                                 WQ_MEM_RECLAIM);
        if (di->charger_wq == NULL) {
-               dev_err(di->dev, "failed to create work queue\n");
+               dev_err(dev, "failed to create work queue\n");
                return -ENOMEM;
        }
 
@@ -3526,10 +3512,10 @@ static int ab8500_charger_probe(struct platform_device *pdev)
         * is a charger connected to avoid erroneous BTEMP_HIGH/LOW
         * interrupts during charging
         */
-       di->regu = devm_regulator_get(di->dev, "vddadc");
+       di->regu = devm_regulator_get(dev, "vddadc");
        if (IS_ERR(di->regu)) {
                ret = PTR_ERR(di->regu);
-               dev_err(di->dev, "failed to get vddadc regulator\n");
+               dev_err(dev, "failed to get vddadc regulator\n");
                goto free_charger_wq;
        }
 
@@ -3537,17 +3523,17 @@ static int ab8500_charger_probe(struct platform_device *pdev)
        /* Initialize OVV, and other registers */
        ret = ab8500_charger_init_hw_registers(di);
        if (ret) {
-               dev_err(di->dev, "failed to initialize ABB registers\n");
+               dev_err(dev, "failed to initialize ABB registers\n");
                goto free_charger_wq;
        }
 
        /* Register AC charger class */
        if (di->ac_chg.enabled) {
-               di->ac_chg.psy = power_supply_register(di->dev,
+               di->ac_chg.psy = power_supply_register(dev,
                                                       &ab8500_ac_chg_desc,
                                                       &ac_psy_cfg);
                if (IS_ERR(di->ac_chg.psy)) {
-                       dev_err(di->dev, "failed to register AC charger\n");
+                       dev_err(dev, "failed to register AC charger\n");
                        ret = PTR_ERR(di->ac_chg.psy);
                        goto free_charger_wq;
                }
@@ -3555,11 +3541,11 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 
        /* Register USB charger class */
        if (di->usb_chg.enabled) {
-               di->usb_chg.psy = power_supply_register(di->dev,
+               di->usb_chg.psy = power_supply_register(dev,
                                                        &ab8500_usb_chg_desc,
                                                        &usb_psy_cfg);
                if (IS_ERR(di->usb_chg.psy)) {
-                       dev_err(di->dev, "failed to register USB charger\n");
+                       dev_err(dev, "failed to register USB charger\n");
                        ret = PTR_ERR(di->usb_chg.psy);
                        goto free_ac;
                }
@@ -3567,14 +3553,14 @@ static int ab8500_charger_probe(struct platform_device *pdev)
 
        di->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
        if (IS_ERR_OR_NULL(di->usb_phy)) {
-               dev_err(di->dev, "failed to get usb transceiver\n");
+               dev_err(dev, "failed to get usb transceiver\n");
                ret = -EINVAL;
                goto free_usb;
        }
        di->nb.notifier_call = ab8500_charger_usb_notifier_call;
        ret = usb_register_notifier(di->usb_phy, &di->nb);
        if (ret) {
-               dev_err(di->dev, "failed to register usb notifier\n");
+               dev_err(dev, "failed to register usb notifier\n");
                goto put_usb_phy;
        }
 
@@ -3603,15 +3589,15 @@ static int ab8500_charger_probe(struct platform_device *pdev)
                }
 
                ret = request_threaded_irq(irq, NULL, ab8500_charger_irq[i].isr,
-                       IRQF_SHARED | IRQF_NO_SUSPEND,
+                       IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
                        ab8500_charger_irq[i].name, di);
 
                if (ret != 0) {
-                       dev_err(di->dev, "failed to request %s IRQ %d: %d\n"
+                       dev_err(dev, "failed to request %s IRQ %d: %d\n"
                                , ab8500_charger_irq[i].name, irq, ret);
                        goto free_irq;
                }
-               dev_dbg(di->dev, "Requested %s IRQ %d: %d\n",
+               dev_dbg(dev, "Requested %s IRQ %d: %d\n",
                        ab8500_charger_irq[i].name, irq, ret);
        }
 
@@ -3659,6 +3645,8 @@ free_charger_wq:
        return ret;
 }
 
+static SIMPLE_DEV_PM_OPS(ab8500_charger_pm_ops, ab8500_charger_suspend, ab8500_charger_resume);
+
 static const struct of_device_id ab8500_charger_match[] = {
        { .compatible = "stericsson,ab8500-charger", },
        { },
@@ -3667,11 +3655,10 @@ static const struct of_device_id ab8500_charger_match[] = {
 static struct platform_driver ab8500_charger_driver = {
        .probe = ab8500_charger_probe,
        .remove = ab8500_charger_remove,
-       .suspend = ab8500_charger_suspend,
-       .resume = ab8500_charger_resume,
        .driver = {
                .name = "ab8500-charger",
                .of_match_table = ab8500_charger_match,
+               .pm = &ab8500_charger_pm_ops,
        },
 };
 
index 592a73d..3873e48 100644 (file)
@@ -2942,10 +2942,9 @@ static void ab8500_fg_sysfs_psy_remove_attrs(struct ab8500_fg *di)
 
 /* Exposure to the sysfs interface <<END>> */
 
-#if defined(CONFIG_PM)
-static int ab8500_fg_resume(struct platform_device *pdev)
+static int __maybe_unused ab8500_fg_resume(struct device *dev)
 {
-       struct ab8500_fg *di = platform_get_drvdata(pdev);
+       struct ab8500_fg *di = dev_get_drvdata(dev);
 
        /*
         * Change state if we're not charging. If we're charging we will wake
@@ -2959,10 +2958,9 @@ static int ab8500_fg_resume(struct platform_device *pdev)
        return 0;
 }
 
-static int ab8500_fg_suspend(struct platform_device *pdev,
-       pm_message_t state)
+static int __maybe_unused ab8500_fg_suspend(struct device *dev)
 {
-       struct ab8500_fg *di = platform_get_drvdata(pdev);
+       struct ab8500_fg *di = dev_get_drvdata(dev);
 
        flush_delayed_work(&di->fg_periodic_work);
        flush_work(&di->fg_work);
@@ -2980,10 +2978,6 @@ static int ab8500_fg_suspend(struct platform_device *pdev,
 
        return 0;
 }
-#else
-#define ab8500_fg_suspend      NULL
-#define ab8500_fg_resume       NULL
-#endif
 
 static int ab8500_fg_remove(struct platform_device *pdev)
 {
@@ -3007,14 +3001,11 @@ static int ab8500_fg_remove(struct platform_device *pdev)
 }
 
 /* ab8500 fg driver interrupts and their respective isr */
-static struct ab8500_fg_interrupts ab8500_fg_irq_th[] = {
+static struct ab8500_fg_interrupts ab8500_fg_irq[] = {
        {"NCONV_ACCU", ab8500_fg_cc_convend_handler},
        {"BATT_OVV", ab8500_fg_batt_ovv_handler},
        {"LOW_BAT_F", ab8500_fg_lowbatf_handler},
        {"CC_INT_CALIB", ab8500_fg_cc_int_calib_handler},
-};
-
-static struct ab8500_fg_interrupts ab8500_fg_irq_bh[] = {
        {"CCEOC", ab8500_fg_cc_data_end_handler},
 };
 
@@ -3037,26 +3028,25 @@ static int ab8500_fg_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct abx500_bm_data *plat = pdev->dev.platform_data;
        struct power_supply_config psy_cfg = {};
+       struct device *dev = &pdev->dev;
        struct ab8500_fg *di;
        int i, irq;
        int ret = 0;
 
-       di = devm_kzalloc(&pdev->dev, sizeof(*di), GFP_KERNEL);
-       if (!di) {
-               dev_err(&pdev->dev, "%s no mem for ab8500_fg\n", __func__);
+       di = devm_kzalloc(dev, sizeof(*di), GFP_KERNEL);
+       if (!di)
                return -ENOMEM;
-       }
 
        if (!plat) {
-               dev_err(&pdev->dev, "no battery management data supplied\n");
+               dev_err(dev, "no battery management data supplied\n");
                return -EINVAL;
        }
        di->bm = plat;
 
        if (np) {
-               ret = ab8500_bm_of_probe(&pdev->dev, np, di->bm);
+               ret = ab8500_bm_of_probe(dev, np, di->bm);
                if (ret) {
-                       dev_err(&pdev->dev, "failed to get battery information\n");
+                       dev_err(dev, "failed to get battery information\n");
                        return ret;
                }
        }
@@ -3064,15 +3054,14 @@ static int ab8500_fg_probe(struct platform_device *pdev)
        mutex_init(&di->cc_lock);
 
        /* get parent data */
-       di->dev = &pdev->dev;
+       di->dev = dev;
        di->parent = dev_get_drvdata(pdev->dev.parent);
 
-       di->main_bat_v = devm_iio_channel_get(&pdev->dev, "main_bat_v");
+       di->main_bat_v = devm_iio_channel_get(dev, "main_bat_v");
        if (IS_ERR(di->main_bat_v)) {
-               if (PTR_ERR(di->main_bat_v) == -ENODEV)
-                       return -EPROBE_DEFER;
-               dev_err(&pdev->dev, "failed to get main battery ADC channel\n");
-               return PTR_ERR(di->main_bat_v);
+               ret = dev_err_probe(dev, PTR_ERR(di->main_bat_v),
+                                   "failed to get main battery ADC channel\n");
+               return ret;
        }
 
        psy_cfg.supplied_to = supply_interface;
@@ -3094,7 +3083,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
        /* Create a work queue for running the FG algorithm */
        di->fg_wq = alloc_ordered_workqueue("ab8500_fg_wq", WQ_MEM_RECLAIM);
        if (di->fg_wq == NULL) {
-               dev_err(di->dev, "failed to create work queue\n");
+               dev_err(dev, "failed to create work queue\n");
                return -ENOMEM;
        }
 
@@ -3129,7 +3118,7 @@ static int ab8500_fg_probe(struct platform_device *pdev)
        /* Initialize OVV, and other registers */
        ret = ab8500_fg_init_hw_registers(di);
        if (ret) {
-               dev_err(di->dev, "failed to initialize registers\n");
+               dev_err(dev, "failed to initialize registers\n");
                goto free_inst_curr_wq;
        }
 
@@ -3138,9 +3127,9 @@ static int ab8500_fg_probe(struct platform_device *pdev)
        di->flags.batt_id_received = false;
 
        /* Register FG power supply class */
-       di->fg_psy = power_supply_register(di->dev, &ab8500_fg_desc, &psy_cfg);
+       di->fg_psy = power_supply_register(dev, &ab8500_fg_desc, &psy_cfg);
        if (IS_ERR(di->fg_psy)) {
-               dev_err(di->dev, "failed to register FG psy\n");
+               dev_err(dev, "failed to register FG psy\n");
                ret = PTR_ERR(di->fg_psy);
                goto free_inst_curr_wq;
        }
@@ -3156,45 +3145,26 @@ static int ab8500_fg_probe(struct platform_device *pdev)
        init_completion(&di->ab8500_fg_complete);
 
        /* Register primary interrupt handlers */
-       for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq_th); i++) {
-               irq = platform_get_irq_byname(pdev, ab8500_fg_irq_th[i].name);
+       for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq); i++) {
+               irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name);
                if (irq < 0) {
                        ret = irq;
-                       goto free_irq_th;
+                       goto free_irq;
                }
 
-               ret = request_irq(irq, ab8500_fg_irq_th[i].isr,
-                                 IRQF_SHARED | IRQF_NO_SUSPEND,
-                                 ab8500_fg_irq_th[i].name, di);
+               ret = request_threaded_irq(irq, NULL, ab8500_fg_irq[i].isr,
+                                 IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
+                                 ab8500_fg_irq[i].name, di);
 
                if (ret != 0) {
-                       dev_err(di->dev, "failed to request %s IRQ %d: %d\n",
-                               ab8500_fg_irq_th[i].name, irq, ret);
-                       goto free_irq_th;
+                       dev_err(dev, "failed to request %s IRQ %d: %d\n",
+                               ab8500_fg_irq[i].name, irq, ret);
+                       goto free_irq;
                }
-               dev_dbg(di->dev, "Requested %s IRQ %d: %d\n",
-                       ab8500_fg_irq_th[i].name, irq, ret);
+               dev_dbg(dev, "Requested %s IRQ %d: %d\n",
+                       ab8500_fg_irq[i].name, irq, ret);
        }
 
-       /* Register threaded interrupt handler */
-       irq = platform_get_irq_byname(pdev, ab8500_fg_irq_bh[0].name);
-       if (irq < 0) {
-               ret = irq;
-               goto free_irq_th;
-       }
-
-       ret = request_threaded_irq(irq, NULL, ab8500_fg_irq_bh[0].isr,
-                               IRQF_SHARED | IRQF_NO_SUSPEND | IRQF_ONESHOT,
-                       ab8500_fg_irq_bh[0].name, di);
-
-       if (ret != 0) {
-               dev_err(di->dev, "failed to request %s IRQ %d: %d\n",
-                       ab8500_fg_irq_bh[0].name, irq, ret);
-               goto free_irq_th;
-       }
-       dev_dbg(di->dev, "Requested %s IRQ %d: %d\n",
-               ab8500_fg_irq_bh[0].name, irq, ret);
-
        di->irq = platform_get_irq_byname(pdev, "CCEOC");
        disable_irq(di->irq);
        di->nbr_cceoc_irq_cnt = 0;
@@ -3203,13 +3173,13 @@ static int ab8500_fg_probe(struct platform_device *pdev)
 
        ret = ab8500_fg_sysfs_init(di);
        if (ret) {
-               dev_err(di->dev, "failed to create sysfs entry\n");
+               dev_err(dev, "failed to create sysfs entry\n");
                goto free_irq;
        }
 
        ret = ab8500_fg_sysfs_psy_create_attrs(di);
        if (ret) {
-               dev_err(di->dev, "failed to create FG psy\n");
+               dev_err(dev, "failed to create FG psy\n");
                ab8500_fg_sysfs_exit(di);
                goto free_irq;
        }
@@ -3230,12 +3200,9 @@ static int ab8500_fg_probe(struct platform_device *pdev)
 
 free_irq:
        /* We also have to free all registered irqs */
-       irq = platform_get_irq_byname(pdev, ab8500_fg_irq_bh[0].name);
-       free_irq(irq, di);
-free_irq_th:
        while (--i >= 0) {
                /* Last assignment of i from primary interrupt handlers */
-               irq = platform_get_irq_byname(pdev, ab8500_fg_irq_th[i].name);
+               irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name);
                free_irq(irq, di);
        }
 
@@ -3245,6 +3212,8 @@ free_inst_curr_wq:
        return ret;
 }
 
+static SIMPLE_DEV_PM_OPS(ab8500_fg_pm_ops, ab8500_fg_suspend, ab8500_fg_resume);
+
 static const struct of_device_id ab8500_fg_match[] = {
        { .compatible = "stericsson,ab8500-fg", },
        { },
@@ -3253,11 +3222,10 @@ static const struct of_device_id ab8500_fg_match[] = {
 static struct platform_driver ab8500_fg_driver = {
        .probe = ab8500_fg_probe,
        .remove = ab8500_fg_remove,
-       .suspend = ab8500_fg_suspend,
-       .resume = ab8500_fg_resume,
        .driver = {
                .name = "ab8500-fg",
                .of_match_table = ab8500_fg_match,
+               .pm = &ab8500_fg_pm_ops,
        },
 };
 
index 175c4f3..a9d84d8 100644 (file)
@@ -1913,10 +1913,9 @@ static int abx500_chargalg_sysfs_init(struct abx500_chargalg *di)
 }
 /* Exposure to the sysfs interface <<END>> */
 
-#if defined(CONFIG_PM)
-static int abx500_chargalg_resume(struct platform_device *pdev)
+static int __maybe_unused abx500_chargalg_resume(struct device *dev)
 {
-       struct abx500_chargalg *di = platform_get_drvdata(pdev);
+       struct abx500_chargalg *di = dev_get_drvdata(dev);
 
        /* Kick charger watchdog if charging (any charger online) */
        if (di->chg_info.online_chg)
@@ -1931,10 +1930,9 @@ static int abx500_chargalg_resume(struct platform_device *pdev)
        return 0;
 }
 
-static int abx500_chargalg_suspend(struct platform_device *pdev,
-       pm_message_t state)
+static int __maybe_unused abx500_chargalg_suspend(struct device *dev)
 {
-       struct abx500_chargalg *di = platform_get_drvdata(pdev);
+       struct abx500_chargalg *di = dev_get_drvdata(dev);
 
        if (di->chg_info.online_chg)
                cancel_delayed_work_sync(&di->chargalg_wd_work);
@@ -1943,10 +1941,6 @@ static int abx500_chargalg_suspend(struct platform_device *pdev,
 
        return 0;
 }
-#else
-#define abx500_chargalg_suspend      NULL
-#define abx500_chargalg_resume       NULL
-#endif
 
 static int abx500_chargalg_remove(struct platform_device *pdev)
 {
@@ -2080,6 +2074,8 @@ free_chargalg_wq:
        return ret;
 }
 
+static SIMPLE_DEV_PM_OPS(abx500_chargalg_pm_ops, abx500_chargalg_suspend, abx500_chargalg_resume);
+
 static const struct of_device_id ab8500_chargalg_match[] = {
        { .compatible = "stericsson,ab8500-chargalg", },
        { },
@@ -2088,11 +2084,10 @@ static const struct of_device_id ab8500_chargalg_match[] = {
 static struct platform_driver abx500_chargalg_driver = {
        .probe = abx500_chargalg_probe,
        .remove = abx500_chargalg_remove,
-       .suspend = abx500_chargalg_suspend,
-       .resume = abx500_chargalg_resume,
        .driver = {
                .name = "ab8500-chargalg",
                .of_match_table = ab8500_chargalg_match,
+               .pm = &abx500_chargalg_pm_ops,
        },
 };
 
index 0eaa86c..70b28b6 100644 (file)
@@ -92,7 +92,7 @@ static irqreturn_t axp20x_usb_power_irq(int irq, void *devid)
 
        power_supply_changed(power->supply);
 
-       mod_delayed_work(system_wq, &power->vbus_detect, DEBOUNCE_TIME);
+       mod_delayed_work(system_power_efficient_wq, &power->vbus_detect, DEBOUNCE_TIME);
 
        return IRQ_HANDLED;
 }
@@ -117,7 +117,7 @@ static void axp20x_usb_power_poll_vbus(struct work_struct *work)
 
 out:
        if (axp20x_usb_vbus_needs_polling(power))
-               mod_delayed_work(system_wq, &power->vbus_detect, DEBOUNCE_TIME);
+               mod_delayed_work(system_power_efficient_wq, &power->vbus_detect, DEBOUNCE_TIME);
 }
 
 static int axp20x_get_current_max(struct axp20x_usb_power *power, int *val)
@@ -397,7 +397,7 @@ static int axp20x_usb_power_prop_writeable(struct power_supply *psy,
        struct axp20x_usb_power *power = power_supply_get_drvdata(psy);
 
        /*
-        * The VBUS path select flag works differently on on AXP288 and newer:
+        * The VBUS path select flag works differently on AXP288 and newer:
         *  - On AXP20x and AXP22x, the flag enables VBUS (ignoring N_VBUSEN).
         *  - On AXP288 and AXP8xx, the flag disables VBUS (ignoring N_VBUSEN).
         * We only expose the control on variants where it can be used to force
@@ -525,7 +525,7 @@ static int axp20x_usb_power_resume(struct device *dev)
        while (i < power->num_irqs)
                enable_irq(power->irqs[i++]);
 
-       mod_delayed_work(system_wq, &power->vbus_detect, DEBOUNCE_TIME);
+       mod_delayed_work(system_power_efficient_wq, &power->vbus_detect, DEBOUNCE_TIME);
 
        return 0;
 }
@@ -647,7 +647,7 @@ static int axp20x_usb_power_probe(struct platform_device *pdev)
 
        INIT_DELAYED_WORK(&power->vbus_detect, axp20x_usb_power_poll_vbus);
        if (axp20x_usb_vbus_needs_polling(power))
-               queue_delayed_work(system_wq, &power->vbus_detect, 0);
+               queue_delayed_work(system_power_efficient_wq, &power->vbus_detect, 0);
 
        return 0;
 }
index 9d981b7..a4df1ea 100644 (file)
@@ -548,14 +548,15 @@ out:
 
 /*
  * The HP Pavilion x2 10 series comes in a number of variants:
- * Bay Trail SoC    + AXP288 PMIC, DMI_BOARD_NAME: "815D"
- * Cherry Trail SoC + AXP288 PMIC, DMI_BOARD_NAME: "813E"
- * Cherry Trail SoC + TI PMIC,     DMI_BOARD_NAME: "827C" or "82F4"
+ * Bay Trail SoC    + AXP288 PMIC, Micro-USB, DMI_BOARD_NAME: "8021"
+ * Bay Trail SoC    + AXP288 PMIC, Type-C,    DMI_BOARD_NAME: "815D"
+ * Cherry Trail SoC + AXP288 PMIC, Type-C,    DMI_BOARD_NAME: "813E"
+ * Cherry Trail SoC + TI PMIC,     Type-C,    DMI_BOARD_NAME: "827C" or "82F4"
  *
- * The variants with the AXP288 PMIC are all kinds of special:
+ * The variants with the AXP288 + Type-C connector are all kinds of special:
  *
- * 1. All variants use a Type-C connector which the AXP288 does not support, so
- * when using a Type-C charger it is not recognized. Unlike most AXP288 devices,
+ * 1. They use a Type-C connector which the AXP288 does not support, so when
+ * using a Type-C charger it is not recognized. Unlike most AXP288 devices,
  * this model actually has mostly working ACPI AC / Battery code, the ACPI code
  * "solves" this by simply setting the input_current_limit to 3A.
  * There are still some issues with the ACPI code, so we use this native driver,
@@ -578,12 +579,17 @@ out:
  */
 static const struct dmi_system_id axp288_hp_x2_dmi_ids[] = {
        {
-               /*
-                * Bay Trail model has "Hewlett-Packard" as sys_vendor, Cherry
-                * Trail model has "HP", so we only match on product_name.
-                */
                .matches = {
-                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+                       DMI_EXACT_MATCH(DMI_BOARD_NAME, "815D"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "HP"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+                       DMI_EXACT_MATCH(DMI_BOARD_NAME, "813E"),
                },
        },
        {} /* Terminating entry */
index d141865..4841e14 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/workqueue.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/extcon-provider.h>
 
@@ -448,8 +447,10 @@ static ssize_t bq24190_sysfs_show(struct device *dev,
                return -EINVAL;
 
        ret = pm_runtime_get_sync(bdi->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               pm_runtime_put_noidle(bdi->dev);
                return ret;
+       }
 
        ret = bq24190_read_mask(bdi, info->reg, info->mask, info->shift, &v);
        if (ret)
@@ -1077,8 +1078,10 @@ static int bq24190_charger_get_property(struct power_supply *psy,
        dev_dbg(bdi->dev, "prop: %d\n", psp);
 
        ret = pm_runtime_get_sync(bdi->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               pm_runtime_put_noidle(bdi->dev);
                return ret;
+       }
 
        switch (psp) {
        case POWER_SUPPLY_PROP_CHARGE_TYPE:
@@ -1149,8 +1152,10 @@ static int bq24190_charger_set_property(struct power_supply *psy,
        dev_dbg(bdi->dev, "prop: %d\n", psp);
 
        ret = pm_runtime_get_sync(bdi->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               pm_runtime_put_noidle(bdi->dev);
                return ret;
+       }
 
        switch (psp) {
        case POWER_SUPPLY_PROP_ONLINE:
@@ -1410,8 +1415,10 @@ static int bq24190_battery_get_property(struct power_supply *psy,
        dev_dbg(bdi->dev, "prop: %d\n", psp);
 
        ret = pm_runtime_get_sync(bdi->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               pm_runtime_put_noidle(bdi->dev);
                return ret;
+       }
 
        switch (psp) {
        case POWER_SUPPLY_PROP_STATUS:
@@ -1456,8 +1463,10 @@ static int bq24190_battery_set_property(struct power_supply *psy,
        dev_dbg(bdi->dev, "prop: %d\n", psp);
 
        ret = pm_runtime_get_sync(bdi->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               pm_runtime_put_noidle(bdi->dev);
                return ret;
+       }
 
        switch (psp) {
        case POWER_SUPPLY_PROP_ONLINE:
index 6931e1d..ab2f4bf 100644 (file)
@@ -18,7 +18,6 @@
  */
 
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
index 34c21c5..945c325 100644 (file)
@@ -299,7 +299,7 @@ static const union {
        /* TODO: BQ25896 has max ICHG 3008 mA */
        [TBL_ICHG] =    { .rt = {0,       5056000, 64000} },     /* uA */
        [TBL_ITERM] =   { .rt = {64000,   1024000, 64000} },     /* uA */
-       [TBL_IILIM] =   { .rt = {50000,   3200000, 50000} },     /* uA */
+       [TBL_IILIM] =   { .rt = {100000,  3250000, 50000} },     /* uA */
        [TBL_VREG] =    { .rt = {3840000, 4608000, 16000} },     /* uV */
        [TBL_BOOSTV] =  { .rt = {4550000, 5510000, 64000} },     /* uV */
        [TBL_SYSVMIN] = { .rt = {3000000, 3700000, 100000} },    /* uV */
index cbd588e..7fb9b54 100644 (file)
@@ -12,7 +12,9 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
 #include <linux/mfd/ucb1x00.h>
 
 #include <asm/mach/sharpsl_param.h>
@@ -31,18 +33,18 @@ struct collie_bat {
        struct mutex work_lock; /* protects data */
 
        bool (*is_present)(struct collie_bat *bat);
-       int gpio_full;
-       int gpio_charge_on;
+       struct gpio_desc *gpio_full;
+       struct gpio_desc *gpio_charge_on;
 
        int technology;
 
-       int gpio_bat;
+       struct gpio_desc *gpio_bat;
        int adc_bat;
        int adc_bat_divider;
        int bat_max;
        int bat_min;
 
-       int gpio_temp;
+       struct gpio_desc *gpio_temp;
        int adc_temp;
        int adc_temp_divider;
 };
@@ -53,15 +55,15 @@ static unsigned long collie_read_bat(struct collie_bat *bat)
 {
        unsigned long value = 0;
 
-       if (bat->gpio_bat < 0 || bat->adc_bat < 0)
+       if (!bat->gpio_bat || bat->adc_bat < 0)
                return 0;
        mutex_lock(&bat_lock);
-       gpio_set_value(bat->gpio_bat, 1);
+       gpiod_set_value(bat->gpio_bat, 1);
        msleep(5);
        ucb1x00_adc_enable(ucb);
        value = ucb1x00_adc_read(ucb, bat->adc_bat, UCB_SYNC);
        ucb1x00_adc_disable(ucb);
-       gpio_set_value(bat->gpio_bat, 0);
+       gpiod_set_value(bat->gpio_bat, 0);
        mutex_unlock(&bat_lock);
        value = value * 1000000 / bat->adc_bat_divider;
 
@@ -71,16 +73,16 @@ static unsigned long collie_read_bat(struct collie_bat *bat)
 static unsigned long collie_read_temp(struct collie_bat *bat)
 {
        unsigned long value = 0;
-       if (bat->gpio_temp < 0 || bat->adc_temp < 0)
+       if (!bat->gpio_temp || bat->adc_temp < 0)
                return 0;
 
        mutex_lock(&bat_lock);
-       gpio_set_value(bat->gpio_temp, 1);
+       gpiod_set_value(bat->gpio_temp, 1);
        msleep(5);
        ucb1x00_adc_enable(ucb);
        value = ucb1x00_adc_read(ucb, bat->adc_temp, UCB_SYNC);
        ucb1x00_adc_disable(ucb);
-       gpio_set_value(bat->gpio_temp, 0);
+       gpiod_set_value(bat->gpio_temp, 0);
        mutex_unlock(&bat_lock);
 
        value = value * 10000 / bat->adc_temp_divider;
@@ -162,23 +164,23 @@ static void collie_bat_update(struct collie_bat *bat)
                bat->full_chrg = -1;
        } else if (power_supply_am_i_supplied(psy)) {
                if (bat->status == POWER_SUPPLY_STATUS_DISCHARGING) {
-                       gpio_set_value(bat->gpio_charge_on, 1);
+                       gpiod_set_value(bat->gpio_charge_on, 1);
                        mdelay(15);
                }
 
-               if (gpio_get_value(bat->gpio_full)) {
+               if (gpiod_get_value(bat->gpio_full)) {
                        if (old == POWER_SUPPLY_STATUS_CHARGING ||
                                        bat->full_chrg == -1)
                                bat->full_chrg = collie_read_bat(bat);
 
-                       gpio_set_value(bat->gpio_charge_on, 0);
+                       gpiod_set_value(bat->gpio_charge_on, 0);
                        bat->status = POWER_SUPPLY_STATUS_FULL;
                } else {
-                       gpio_set_value(bat->gpio_charge_on, 1);
+                       gpiod_set_value(bat->gpio_charge_on, 1);
                        bat->status = POWER_SUPPLY_STATUS_CHARGING;
                }
        } else {
-               gpio_set_value(bat->gpio_charge_on, 0);
+               gpiod_set_value(bat->gpio_charge_on, 0);
                bat->status = POWER_SUPPLY_STATUS_DISCHARGING;
        }
 
@@ -230,18 +232,18 @@ static struct collie_bat collie_bat_main = {
        .full_chrg = -1,
        .psy = NULL,
 
-       .gpio_full = COLLIE_GPIO_CO,
-       .gpio_charge_on = COLLIE_GPIO_CHARGE_ON,
+       .gpio_full = NULL,
+       .gpio_charge_on = NULL,
 
        .technology = POWER_SUPPLY_TECHNOLOGY_LIPO,
 
-       .gpio_bat = COLLIE_GPIO_MBAT_ON,
+       .gpio_bat = NULL,
        .adc_bat = UCB_ADC_INP_AD1,
        .adc_bat_divider = 155,
        .bat_max = 4310000,
        .bat_min = 1551 * 1000000 / 414,
 
-       .gpio_temp = COLLIE_GPIO_TMP_ON,
+       .gpio_temp = NULL,
        .adc_temp = UCB_ADC_INP_AD0,
        .adc_temp_divider = 10000,
 };
@@ -260,30 +262,24 @@ static struct collie_bat collie_bat_bu = {
        .full_chrg = -1,
        .psy = NULL,
 
-       .gpio_full = -1,
-       .gpio_charge_on = -1,
+       .gpio_full = NULL,
+       .gpio_charge_on = NULL,
 
        .technology = POWER_SUPPLY_TECHNOLOGY_LiMn,
 
-       .gpio_bat = COLLIE_GPIO_BBAT_ON,
+       .gpio_bat = NULL,
        .adc_bat = UCB_ADC_INP_AD1,
        .adc_bat_divider = 155,
        .bat_max = 3000000,
        .bat_min = 1900000,
 
-       .gpio_temp = -1,
+       .gpio_temp = NULL,
        .adc_temp = -1,
        .adc_temp_divider = -1,
 };
 
-static struct gpio collie_batt_gpios[] = {
-       { COLLIE_GPIO_CO,           GPIOF_IN,           "main battery full" },
-       { COLLIE_GPIO_MAIN_BAT_LOW, GPIOF_IN,           "main battery low" },
-       { COLLIE_GPIO_CHARGE_ON,    GPIOF_OUT_INIT_LOW, "main charge on" },
-       { COLLIE_GPIO_MBAT_ON,      GPIOF_OUT_INIT_LOW, "main battery" },
-       { COLLIE_GPIO_TMP_ON,       GPIOF_OUT_INIT_LOW, "main battery temp" },
-       { COLLIE_GPIO_BBAT_ON,      GPIOF_OUT_INIT_LOW, "backup battery" },
-};
+/* Obtained but unused GPIO */
+static struct gpio_desc *collie_mbat_low;
 
 #ifdef CONFIG_PM
 static int wakeup_enabled;
@@ -295,7 +291,7 @@ static int collie_bat_suspend(struct ucb1x00_dev *dev)
 
        if (device_may_wakeup(&dev->ucb->dev) &&
            collie_bat_main.status == POWER_SUPPLY_STATUS_CHARGING)
-               wakeup_enabled = !enable_irq_wake(gpio_to_irq(COLLIE_GPIO_CO));
+               wakeup_enabled = !enable_irq_wake(gpiod_to_irq(collie_bat_main.gpio_full));
        else
                wakeup_enabled = 0;
 
@@ -305,7 +301,7 @@ static int collie_bat_suspend(struct ucb1x00_dev *dev)
 static int collie_bat_resume(struct ucb1x00_dev *dev)
 {
        if (wakeup_enabled)
-               disable_irq_wake(gpio_to_irq(COLLIE_GPIO_CO));
+               disable_irq_wake(gpiod_to_irq(collie_bat_main.gpio_full));
 
        /* things may have changed while we were away */
        schedule_work(&bat_work);
@@ -320,16 +316,71 @@ static int collie_bat_probe(struct ucb1x00_dev *dev)
 {
        int ret;
        struct power_supply_config psy_main_cfg = {}, psy_bu_cfg = {};
+       struct gpio_chip *gc = &dev->ucb->gpio;
 
        if (!machine_is_collie())
                return -ENODEV;
 
        ucb = dev->ucb;
 
-       ret = gpio_request_array(collie_batt_gpios,
-                                ARRAY_SIZE(collie_batt_gpios));
-       if (ret)
-               return ret;
+       /* Obtain all the main battery GPIOs */
+       collie_bat_main.gpio_full = gpiod_get(&dev->ucb->dev,
+                                             "main battery full",
+                                             GPIOD_IN);
+       if (IS_ERR(collie_bat_main.gpio_full))
+               return PTR_ERR(collie_bat_main.gpio_full);
+
+       collie_mbat_low = gpiod_get(&dev->ucb->dev,
+                                   "main battery low",
+                                   GPIOD_IN);
+       if (IS_ERR(collie_mbat_low)) {
+               ret = PTR_ERR(collie_mbat_low);
+               goto err_put_gpio_full;
+       }
+
+       collie_bat_main.gpio_charge_on = gpiod_get(&dev->ucb->dev,
+                                                  "main charge on",
+                                                  GPIOD_OUT_LOW);
+       if (IS_ERR(collie_bat_main.gpio_charge_on)) {
+               ret = PTR_ERR(collie_bat_main.gpio_charge_on);
+               goto err_put_mbat_low;
+       }
+
+       /* COLLIE_GPIO_MBAT_ON = GPIO 7 on the UCB (TC35143) */
+       collie_bat_main.gpio_bat = gpiochip_request_own_desc(gc,
+                                               7,
+                                               "main battery",
+                                               GPIO_ACTIVE_HIGH,
+                                               GPIOD_OUT_LOW);
+       if (IS_ERR(collie_bat_main.gpio_bat)) {
+               ret = PTR_ERR(collie_bat_main.gpio_bat);
+               goto err_put_gpio_charge_on;
+       }
+
+       /* COLLIE_GPIO_TMP_ON = GPIO 9 on the UCB (TC35143) */
+       collie_bat_main.gpio_temp = gpiochip_request_own_desc(gc,
+                                               9,
+                                               "main battery temp",
+                                               GPIO_ACTIVE_HIGH,
+                                               GPIOD_OUT_LOW);
+       if (IS_ERR(collie_bat_main.gpio_temp)) {
+               ret = PTR_ERR(collie_bat_main.gpio_temp);
+               goto err_free_gpio_bat;
+       }
+
+       /*
+        * Obtain the backup battery COLLIE_GPIO_BBAT_ON which is
+        * GPIO 8 on the UCB (TC35143)
+        */
+       collie_bat_bu.gpio_bat = gpiochip_request_own_desc(gc,
+                                               8,
+                                               "backup battery",
+                                               GPIO_ACTIVE_HIGH,
+                                               GPIOD_OUT_LOW);
+       if (IS_ERR(collie_bat_bu.gpio_bat)) {
+               ret = PTR_ERR(collie_bat_bu.gpio_bat);
+               goto err_free_gpio_temp;
+       }
 
        mutex_init(&collie_bat_main.work_lock);
 
@@ -370,27 +421,43 @@ err_irq:
 err_psy_reg_bu:
        power_supply_unregister(collie_bat_main.psy);
 err_psy_reg_main:
-
        /* see comment in collie_bat_remove */
        cancel_work_sync(&bat_work);
-       gpio_free_array(collie_batt_gpios, ARRAY_SIZE(collie_batt_gpios));
+       gpiochip_free_own_desc(collie_bat_bu.gpio_bat);
+err_free_gpio_temp:
+       gpiochip_free_own_desc(collie_bat_main.gpio_temp);
+err_free_gpio_bat:
+       gpiochip_free_own_desc(collie_bat_main.gpio_bat);
+err_put_gpio_charge_on:
+       gpiod_put(collie_bat_main.gpio_charge_on);
+err_put_mbat_low:
+       gpiod_put(collie_mbat_low);
+err_put_gpio_full:
+       gpiod_put(collie_bat_main.gpio_full);
+
        return ret;
 }
 
 static void collie_bat_remove(struct ucb1x00_dev *dev)
 {
        free_irq(gpio_to_irq(COLLIE_GPIO_CO), &collie_bat_main);
-
        power_supply_unregister(collie_bat_bu.psy);
        power_supply_unregister(collie_bat_main.psy);
 
+       /* These are obtained from the machine */
+       gpiod_put(collie_bat_main.gpio_full);
+       gpiod_put(collie_mbat_low);
+       gpiod_put(collie_bat_main.gpio_charge_on);
+       /* These are directly from the UCB so let's free them */
+       gpiochip_free_own_desc(collie_bat_main.gpio_bat);
+       gpiochip_free_own_desc(collie_bat_main.gpio_temp);
+       gpiochip_free_own_desc(collie_bat_bu.gpio_bat);
        /*
         * Now cancel the bat_work.  We won't get any more schedules,
         * since all sources (isr and external_power_changed) are
         * unregistered now.
         */
        cancel_work_sync(&bat_work);
-       gpio_free_array(collie_batt_gpios, ARRAY_SIZE(collie_batt_gpios));
 }
 
 static struct ucb1x00_driver collie_bat_driver = {
index caa8297..0032069 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/err.h>
 #include <linux/timer.h>
 #include <linux/jiffies.h>
@@ -52,6 +52,7 @@ struct gab {
        int     level;
        int     status;
        bool cable_plugged;
+       struct gpio_desc *charge_finished;
 };
 
 static struct gab *to_generic_bat(struct power_supply *psy)
@@ -91,13 +92,9 @@ static const enum power_supply_property gab_dyn_props[] = {
 
 static bool gab_charge_finished(struct gab *adc_bat)
 {
-       struct gab_platform_data *pdata = adc_bat->pdata;
-       bool ret = gpio_get_value(pdata->gpio_charge_finished);
-       bool inv = pdata->gpio_inverted;
-
-       if (!gpio_is_valid(pdata->gpio_charge_finished))
+       if (!adc_bat->charge_finished)
                return false;
-       return ret ^ inv;
+       return gpiod_get_value(adc_bat->charge_finished);
 }
 
 static int gab_get_status(struct gab *adc_bat)
@@ -327,18 +324,17 @@ static int gab_probe(struct platform_device *pdev)
 
        INIT_DELAYED_WORK(&adc_bat->bat_work, gab_work);
 
-       if (gpio_is_valid(pdata->gpio_charge_finished)) {
+       adc_bat->charge_finished = devm_gpiod_get_optional(&pdev->dev,
+                                                          "charged", GPIOD_IN);
+       if (adc_bat->charge_finished) {
                int irq;
-               ret = gpio_request(pdata->gpio_charge_finished, "charged");
-               if (ret)
-                       goto gpio_req_fail;
 
-               irq = gpio_to_irq(pdata->gpio_charge_finished);
+               irq = gpiod_to_irq(adc_bat->charge_finished);
                ret = request_any_context_irq(irq, gab_charged,
                                IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
                                "battery charged", adc_bat);
                if (ret < 0)
-                       goto err_gpio;
+                       goto gpio_req_fail;
        }
 
        platform_set_drvdata(pdev, adc_bat);
@@ -348,8 +344,6 @@ static int gab_probe(struct platform_device *pdev)
                        msecs_to_jiffies(0));
        return 0;
 
-err_gpio:
-       gpio_free(pdata->gpio_charge_finished);
 gpio_req_fail:
        power_supply_unregister(adc_bat->psy);
 err_reg_fail:
@@ -367,14 +361,11 @@ static int gab_remove(struct platform_device *pdev)
 {
        int chan;
        struct gab *adc_bat = platform_get_drvdata(pdev);
-       struct gab_platform_data *pdata = adc_bat->pdata;
 
        power_supply_unregister(adc_bat->psy);
 
-       if (gpio_is_valid(pdata->gpio_charge_finished)) {
-               free_irq(gpio_to_irq(pdata->gpio_charge_finished), adc_bat);
-               gpio_free(pdata->gpio_charge_finished);
-       }
+       if (adc_bat->charge_finished)
+               free_irq(gpiod_to_irq(adc_bat->charge_finished), adc_bat);
 
        for (chan = 0; chan < ARRAY_SIZE(gab_chan_name); chan++) {
                if (adc_bat->channel[chan])
index f284547..79d4b59 100644 (file)
@@ -78,6 +78,7 @@ static enum power_supply_property max17042_battery_props[] = {
        POWER_SUPPLY_PROP_CHARGE_FULL,
        POWER_SUPPLY_PROP_CHARGE_NOW,
        POWER_SUPPLY_PROP_CHARGE_COUNTER,
+       POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
        POWER_SUPPLY_PROP_TEMP,
        POWER_SUPPLY_PROP_TEMP_ALERT_MIN,
        POWER_SUPPLY_PROP_TEMP_ALERT_MAX,
@@ -85,9 +86,10 @@ static enum power_supply_property max17042_battery_props[] = {
        POWER_SUPPLY_PROP_TEMP_MAX,
        POWER_SUPPLY_PROP_HEALTH,
        POWER_SUPPLY_PROP_SCOPE,
+       POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
+       // these two have to be at the end on the list
        POWER_SUPPLY_PROP_CURRENT_NOW,
        POWER_SUPPLY_PROP_CURRENT_AVG,
-       POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
 };
 
 static int max17042_get_temperature(struct max17042_chip *chip, int *temp)
@@ -353,7 +355,8 @@ static int max17042_get_property(struct power_supply *psy,
                if (ret < 0)
                        return ret;
 
-               val->intval = data * 1000 / 2;
+               data64 = sign_extend64(data, 15) * 5000000ll;
+               val->intval = div_s64(data64, chip->pdata->r_sns);
                break;
        case POWER_SUPPLY_PROP_TEMP:
                ret = max17042_get_temperature(chip, &val->intval);
@@ -394,8 +397,8 @@ static int max17042_get_property(struct power_supply *psy,
                        if (ret < 0)
                                return ret;
 
-                       val->intval = sign_extend32(data, 15);
-                       val->intval *= 1562500 / chip->pdata->r_sns;
+                       data64 = sign_extend64(data, 15) * 1562500ll;
+                       val->intval = div_s64(data64, chip->pdata->r_sns);
                } else {
                        return -EINVAL;
                }
@@ -406,12 +409,20 @@ static int max17042_get_property(struct power_supply *psy,
                        if (ret < 0)
                                return ret;
 
-                       val->intval = sign_extend32(data, 15);
-                       val->intval *= 1562500 / chip->pdata->r_sns;
+                       data64 = sign_extend64(data, 15) * 1562500ll;
+                       val->intval = div_s64(data64, chip->pdata->r_sns);
                } else {
                        return -EINVAL;
                }
                break;
+       case POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT:
+               ret = regmap_read(map, MAX17042_ICHGTerm, &data);
+               if (ret < 0)
+                       return ret;
+
+               data64 = data * 1562500ll;
+               val->intval = div_s64(data64, chip->pdata->r_sns);
+               break;
        case POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW:
                ret = regmap_read(map, MAX17042_TTE, &data);
                if (ret < 0)
index f5e84cd..1947af2 100644 (file)
 #include <linux/mfd/max8997.h>
 #include <linux/mfd/max8997-private.h>
 
+/* MAX8997_REG_STATUS4 */
+#define DCINOK_SHIFT           1
+#define DCINOK_MASK            (1 << DCINOK_SHIFT)
+#define DETBAT_SHIFT           2
+#define DETBAT_MASK            (1 << DETBAT_SHIFT)
+
+/* MAX8997_REG_MBCCTRL1 */
+#define TFCH_SHIFT             4
+#define TFCH_MASK              (7 << TFCH_SHIFT)
+
+/* MAX8997_REG_MBCCTRL5 */
+#define ITOPOFF_SHIFT          0
+#define ITOPOFF_MASK           (0xF << ITOPOFF_SHIFT)
+
 struct charger_data {
        struct device *dev;
        struct max8997_dev *iodev;
@@ -20,7 +34,7 @@ struct charger_data {
 };
 
 static enum power_supply_property max8997_battery_props[] = {
-       POWER_SUPPLY_PROP_STATUS, /* "FULL" or "NOT FULL" only. */
+       POWER_SUPPLY_PROP_STATUS, /* "FULL", "CHARGING" or "DISCHARGING". */
        POWER_SUPPLY_PROP_PRESENT, /* the presence of battery */
        POWER_SUPPLY_PROP_ONLINE, /* charger is active or not */
 };
@@ -43,6 +57,10 @@ static int max8997_battery_get_property(struct power_supply *psy,
                        return ret;
                if ((reg & (1 << 0)) == 0x1)
                        val->intval = POWER_SUPPLY_STATUS_FULL;
+               else if ((reg & DCINOK_MASK))
+                       val->intval = POWER_SUPPLY_STATUS_CHARGING;
+               else
+                       val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 
                break;
        case POWER_SUPPLY_PROP_PRESENT:
@@ -50,7 +68,7 @@ static int max8997_battery_get_property(struct power_supply *psy,
                ret = max8997_read_reg(i2c, MAX8997_REG_STATUS4, &reg);
                if (ret)
                        return ret;
-               if ((reg & (1 << 2)) == 0x0)
+               if ((reg & DETBAT_MASK) == 0x0)
                        val->intval = 1;
 
                break;
@@ -59,8 +77,7 @@ static int max8997_battery_get_property(struct power_supply *psy,
                ret = max8997_read_reg(i2c, MAX8997_REG_STATUS4, &reg);
                if (ret)
                        return ret;
-               /* DCINOK */
-               if (reg & (1 << 1))
+               if (reg & DCINOK_MASK)
                        val->intval = 1;
 
                break;
@@ -84,11 +101,14 @@ static int max8997_battery_probe(struct platform_device *pdev)
        int ret = 0;
        struct charger_data *charger;
        struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-       struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev);
+       struct i2c_client *i2c = iodev->i2c;
+       struct max8997_platform_data *pdata = iodev->pdata;
        struct power_supply_config psy_cfg = {};
 
-       if (!pdata)
+       if (!pdata) {
+               dev_err(&pdev->dev, "No platform data supplied.\n");
                return -EINVAL;
+       }
 
        if (pdata->eoc_mA) {
                int val = (pdata->eoc_mA - 50) / 10;
@@ -97,30 +117,29 @@ static int max8997_battery_probe(struct platform_device *pdev)
                if (val > 0xf)
                        val = 0xf;
 
-               ret = max8997_update_reg(iodev->i2c,
-                               MAX8997_REG_MBCCTRL5, val, 0xf);
+               ret = max8997_update_reg(i2c, MAX8997_REG_MBCCTRL5,
+                               val << ITOPOFF_SHIFT, ITOPOFF_MASK);
                if (ret < 0) {
                        dev_err(&pdev->dev, "Cannot use i2c bus.\n");
                        return ret;
                }
        }
-
        switch (pdata->timeout) {
        case 5:
-               ret = max8997_update_reg(iodev->i2c, MAX8997_REG_MBCCTRL1,
-                               0x2 << 4, 0x7 << 4);
+               ret = max8997_update_reg(i2c, MAX8997_REG_MBCCTRL1,
+                               0x2 << TFCH_SHIFT, TFCH_MASK);
                break;
        case 6:
-               ret = max8997_update_reg(iodev->i2c, MAX8997_REG_MBCCTRL1,
-                               0x3 << 4, 0x7 << 4);
+               ret = max8997_update_reg(i2c, MAX8997_REG_MBCCTRL1,
+                               0x3 << TFCH_SHIFT, TFCH_MASK);
                break;
        case 7:
-               ret = max8997_update_reg(iodev->i2c, MAX8997_REG_MBCCTRL1,
-                               0x4 << 4, 0x7 << 4);
+               ret = max8997_update_reg(i2c, MAX8997_REG_MBCCTRL1,
+                               0x4 << TFCH_SHIFT, TFCH_MASK);
                break;
        case 0:
-               ret = max8997_update_reg(iodev->i2c, MAX8997_REG_MBCCTRL1,
-                               0x7 << 4, 0x7 << 4);
+               ret = max8997_update_reg(i2c, MAX8997_REG_MBCCTRL1,
+                               0x7 << TFCH_SHIFT, TFCH_MASK);
                break;
        default:
                dev_err(&pdev->dev, "incorrect timeout value (%d)\n",
@@ -138,7 +157,6 @@ static int max8997_battery_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, charger);
 
-
        charger->dev = &pdev->dev;
        charger->iodev = iodev;
 
@@ -168,18 +186,7 @@ static struct platform_driver max8997_battery_driver = {
        .probe = max8997_battery_probe,
        .id_table = max8997_battery_id,
 };
-
-static int __init max8997_battery_init(void)
-{
-       return platform_driver_register(&max8997_battery_driver);
-}
-subsys_initcall(max8997_battery_init);
-
-static void __exit max8997_battery_cleanup(void)
-{
-       platform_driver_unregister(&max8997_battery_driver);
-}
-module_exit(max8997_battery_cleanup);
+module_platform_driver(max8997_battery_driver);
 
 MODULE_DESCRIPTION("MAXIM 8997/8966 battery control driver");
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
index 2df6a24..ac06ecf 100644 (file)
@@ -455,7 +455,6 @@ static int pm2_int_reg4(void *pm2_data, int val)
 static int pm2_int_reg5(void *pm2_data, int val)
 {
        struct pm2xxx_charger *pm2 = pm2_data;
-       int ret = 0;
 
        if (val & (PM2XXX_INT6_ITVPWR2DROP | PM2XXX_INT6_ITVPWR1DROP)) {
                dev_dbg(pm2->dev, "VMPWR drop to VBAT level\n");
@@ -468,7 +467,7 @@ static int pm2_int_reg5(void *pm2_data, int val)
                dev_dbg(pm2->dev, "Falling/Rising edge on WPWR1/2\n");
        }
 
-       return ret;
+       return 0;
 }
 
 static irqreturn_t  pm2xxx_irq_int(int irq, void *data)
index a616b9d..92dd631 100644 (file)
@@ -402,7 +402,7 @@ void power_supply_init_attrs(struct device_type *dev_type)
                struct device_attribute *attr;
 
                if (!power_supply_attrs[i].prop_name) {
-                       pr_warn("%s: Property %d skipped because is is missing from power_supply_attrs\n",
+                       pr_warn("%s: Property %d skipped because it is missing from power_supply_attrs\n",
                                __func__, i);
                        sprintf(power_supply_attrs[i].attr_name, "_err_%d", i);
                } else {
index 60b7f41..a2addc2 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/leds.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/err.h>
 #include <linux/timer.h>
 #include <linux/jiffies.h>
@@ -31,6 +31,7 @@ struct s3c_adc_bat {
        struct power_supply             *psy;
        struct s3c_adc_client           *client;
        struct s3c_adc_bat_pdata        *pdata;
+       struct gpio_desc                *charge_finished;
        int                             volt_value;
        int                             cur_value;
        unsigned int                    timestamp;
@@ -132,9 +133,7 @@ static int calc_full_volt(int volt_val, int cur_val, int impedance)
 
 static int charge_finished(struct s3c_adc_bat *bat)
 {
-       return bat->pdata->gpio_inverted ?
-               !gpio_get_value(bat->pdata->gpio_charge_finished) :
-               gpio_get_value(bat->pdata->gpio_charge_finished);
+       return gpiod_get_value(bat->charge_finished);
 }
 
 static int s3c_adc_bat_get_property(struct power_supply *psy,
@@ -169,7 +168,7 @@ static int s3c_adc_bat_get_property(struct power_supply *psy,
        }
 
        if (bat->cable_plugged &&
-               ((bat->pdata->gpio_charge_finished < 0) ||
+               (!bat->charge_finished ||
                !charge_finished(bat))) {
                lut = bat->pdata->lut_acin;
                lut_size = bat->pdata->lut_acin_cnt;
@@ -206,7 +205,7 @@ static int s3c_adc_bat_get_property(struct power_supply *psy,
 
        switch (psp) {
        case POWER_SUPPLY_PROP_STATUS:
-               if (bat->pdata->gpio_charge_finished < 0)
+               if (!bat->charge_finished)
                        val->intval = bat->level == 100000 ?
                                POWER_SUPPLY_STATUS_FULL : bat->status;
                else
@@ -265,7 +264,7 @@ static void s3c_adc_bat_work(struct work_struct *work)
                        bat->status = POWER_SUPPLY_STATUS_DISCHARGING;
                }
        } else {
-               if ((bat->pdata->gpio_charge_finished >= 0) && is_plugged) {
+               if (bat->charge_finished && is_plugged) {
                        is_charged = charge_finished(&main_bat);
                        if (is_charged) {
                                if (bat->pdata->disable_charger)
@@ -294,6 +293,7 @@ static int s3c_adc_bat_probe(struct platform_device *pdev)
        struct s3c_adc_client   *client;
        struct s3c_adc_bat_pdata *pdata = pdev->dev.platform_data;
        struct power_supply_config psy_cfg = {};
+       struct gpio_desc *gpiod;
        int ret;
 
        client = s3c_adc_register(pdev, NULL, NULL, 0);
@@ -304,8 +304,17 @@ static int s3c_adc_bat_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, client);
 
+       gpiod = devm_gpiod_get_optional(&pdev->dev, "charge-status", GPIOD_IN);
+       if (IS_ERR(gpiod)) {
+               /* Could be probe deferral etc */
+               ret = PTR_ERR(gpiod);
+               dev_err(&pdev->dev, "no GPIO %d\n", ret);
+               return ret;
+       }
+
        main_bat.client = client;
        main_bat.pdata = pdata;
+       main_bat.charge_finished = gpiod;
        main_bat.volt_value = -1;
        main_bat.cur_value = -1;
        main_bat.cable_plugged = 0;
@@ -323,6 +332,7 @@ static int s3c_adc_bat_probe(struct platform_device *pdev)
 
                backup_bat.client = client;
                backup_bat.pdata = pdev->dev.platform_data;
+               backup_bat.charge_finished = gpiod;
                backup_bat.volt_value = -1;
                backup_bat.psy = power_supply_register(&pdev->dev,
                                                       &backup_bat_desc,
@@ -335,12 +345,8 @@ static int s3c_adc_bat_probe(struct platform_device *pdev)
 
        INIT_DELAYED_WORK(&bat_work, s3c_adc_bat_work);
 
-       if (pdata->gpio_charge_finished >= 0) {
-               ret = gpio_request(pdata->gpio_charge_finished, "charged");
-               if (ret)
-                       goto err_gpio;
-
-               ret = request_irq(gpio_to_irq(pdata->gpio_charge_finished),
+       if (gpiod) {
+               ret = request_irq(gpiod_to_irq(gpiod),
                                s3c_adc_bat_charged,
                                IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
                                "battery charged", NULL);
@@ -364,12 +370,9 @@ static int s3c_adc_bat_probe(struct platform_device *pdev)
        return 0;
 
 err_platform:
-       if (pdata->gpio_charge_finished >= 0)
-               free_irq(gpio_to_irq(pdata->gpio_charge_finished), NULL);
+       if (gpiod)
+               free_irq(gpiod_to_irq(gpiod), NULL);
 err_irq:
-       if (pdata->gpio_charge_finished >= 0)
-               gpio_free(pdata->gpio_charge_finished);
-err_gpio:
        if (pdata->backup_volt_mult)
                power_supply_unregister(backup_bat.psy);
 err_reg_backup:
@@ -389,10 +392,8 @@ static int s3c_adc_bat_remove(struct platform_device *pdev)
 
        s3c_adc_release(client);
 
-       if (pdata->gpio_charge_finished >= 0) {
-               free_irq(gpio_to_irq(pdata->gpio_charge_finished), NULL);
-               gpio_free(pdata->gpio_charge_finished);
-       }
+       if (main_bat.charge_finished)
+               free_irq(gpiod_to_irq(main_bat.charge_finished), NULL);
 
        cancel_delayed_work(&bat_work);
 
@@ -408,12 +409,12 @@ static int s3c_adc_bat_suspend(struct platform_device *pdev,
 {
        struct s3c_adc_bat_pdata *pdata = pdev->dev.platform_data;
 
-       if (pdata->gpio_charge_finished >= 0) {
+       if (main_bat.charge_finished) {
                if (device_may_wakeup(&pdev->dev))
                        enable_irq_wake(
-                               gpio_to_irq(pdata->gpio_charge_finished));
+                               gpiod_to_irq(main_bat.charge_finished));
                else {
-                       disable_irq(gpio_to_irq(pdata->gpio_charge_finished));
+                       disable_irq(gpiod_to_irq(main_bat.charge_finished));
                        main_bat.pdata->disable_charger();
                }
        }
@@ -425,12 +426,12 @@ static int s3c_adc_bat_resume(struct platform_device *pdev)
 {
        struct s3c_adc_bat_pdata *pdata = pdev->dev.platform_data;
 
-       if (pdata->gpio_charge_finished >= 0) {
+       if (main_bat.charge_finished) {
                if (device_may_wakeup(&pdev->dev))
                        disable_irq_wake(
-                               gpio_to_irq(pdata->gpio_charge_finished));
+                               gpiod_to_irq(main_bat.charge_finished));
                else
-                       enable_irq(gpio_to_irq(pdata->gpio_charge_finished));
+                       enable_irq(gpiod_to_irq(main_bat.charge_finished));
        }
 
        /* Schedule timer to check current status */
index 18b33f1..4cd2dd8 100644 (file)
@@ -668,7 +668,6 @@ static int wm831x_power_probe(struct platform_device *pdev)
                fallthrough;
        case -EPROBE_DEFER:
                goto err_bat_irq;
-               break;
        }
 
        return ret;
index 63be536..0937e1c 100644 (file)
@@ -53,8 +53,8 @@ config PWM_AB8500
 
 config PWM_ATMEL
        tristate "Atmel PWM support"
-       depends on OF
        depends on ARCH_AT91 || COMPILE_TEST
+       depends on HAS_IOMEM && OF
        help
          Generic PWM framework driver for Atmel SoC.
 
@@ -75,7 +75,8 @@ config PWM_ATMEL_HLCDC_PWM
 
 config PWM_ATMEL_TCB
        tristate "Atmel TC Block PWM support"
-       depends on ATMEL_TCLIB && OF
+       depends on OF
+       select REGMAP_MMIO
        help
          Generic PWM framework driver for Atmel Timer Counter Block.
 
@@ -88,7 +89,7 @@ config PWM_ATMEL_TCB
 config PWM_BCM_IPROC
        tristate "iProc PWM support"
        depends on ARCH_BCM_IPROC || COMPILE_TEST
-       depends on COMMON_CLK
+       depends on COMMON_CLK && HAS_IOMEM
        default ARCH_BCM_IPROC
        help
          Generic PWM framework driver for Broadcom iProc PWM block. This
@@ -111,6 +112,7 @@ config PWM_BCM_KONA
 config PWM_BCM2835
        tristate "BCM2835 PWM support"
        depends on ARCH_BCM2835 || ARCH_BRCMSTB || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          PWM framework driver for BCM2835 controller (Raspberry Pi)
 
@@ -120,6 +122,7 @@ config PWM_BCM2835
 config PWM_BERLIN
        tristate "Marvell Berlin PWM support"
        depends on ARCH_BERLIN || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          PWM framework driver for Marvell Berlin SoCs.
 
@@ -129,6 +132,7 @@ config PWM_BERLIN
 config PWM_BRCMSTB
        tristate "Broadcom STB PWM support"
        depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for the Broadcom Set-top-Box
          SoCs (BCM7xxx).
@@ -160,9 +164,19 @@ config PWM_CROS_EC
          PWM driver for exposing a PWM attached to the ChromeOS Embedded
          Controller.
 
+config PWM_DWC
+       tristate "DesignWare PWM Controller"
+       depends on PCI
+       help
+         PWM driver for Synopsys DWC PWM Controller attached to a PCI bus.
+
+         To compile this driver as a module, choose M here: the module
+         will be called pwm-dwc.
+
 config PWM_EP93XX
        tristate "Cirrus Logic EP93xx PWM support"
        depends on ARCH_EP93XX || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for Cirrus Logic EP93xx.
 
@@ -184,6 +198,7 @@ config PWM_FSL_FTM
 config PWM_HIBVT
        tristate "HiSilicon BVT PWM support"
        depends on ARCH_HISI || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for HiSilicon BVT SoCs.
 
@@ -206,6 +221,7 @@ config PWM_IMG
 config PWM_IMX1
        tristate "i.MX1 PWM support"
        depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for i.MX1 and i.MX21
 
@@ -215,6 +231,7 @@ config PWM_IMX1
 config PWM_IMX27
        tristate "i.MX27 PWM support"
        depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for i.MX27 and later i.MX SoCs.
 
@@ -232,6 +249,17 @@ config PWM_IMX_TPM
          To compile this driver as a module, choose M here: the module
          will be called pwm-imx-tpm.
 
+config PWM_INTEL_LGM
+       tristate "Intel LGM PWM support"
+       depends on HAS_IOMEM
+       depends on (OF && X86) || COMPILE_TEST
+       select REGMAP_MMIO
+       help
+         Generic PWM fan controller driver for LGM SoC.
+
+         To compile this driver as a module, choose M here: the module
+         will be called pwm-intel-lgm.
+
 config PWM_IQS620A
        tristate "Azoteq IQS620A PWM support"
        depends on MFD_IQS62X || COMPILE_TEST
@@ -254,6 +282,15 @@ config PWM_JZ4740
          To compile this driver as a module, choose M here: the module
          will be called pwm-jz4740.
 
+config PWM_KEEMBAY
+       tristate "Intel Keem Bay PWM driver"
+       depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST)
+       help
+         The platform driver for Intel Keem Bay PWM controller.
+
+         To compile this driver as a module, choose M here: the module
+         will be called pwm-keembay.
+
 config PWM_LP3943
        tristate "TI/National Semiconductor LP3943 PWM support"
        depends on MFD_LP3943
@@ -267,6 +304,7 @@ config PWM_LP3943
 config PWM_LPC18XX_SCT
        tristate "LPC18xx/43xx PWM/SCT support"
        depends on ARCH_LPC18XX || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for NXP LPC18xx PWM/SCT which
          supports 16 channels.
@@ -279,6 +317,7 @@ config PWM_LPC18XX_SCT
 config PWM_LPC32XX
        tristate "LPC32XX PWM support"
        depends on ARCH_LPC32XX || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for LPC32XX. The LPC32XX SOC has two
          PWM controllers.
@@ -287,11 +326,13 @@ config PWM_LPC32XX
          will be called pwm-lpc32xx.
 
 config PWM_LPSS
+       depends on HAS_IOMEM
        tristate
 
 config PWM_LPSS_PCI
        tristate "Intel LPSS PWM PCI driver"
-       depends on X86 && PCI
+       depends on X86 || COMPILE_TEST
+       depends on HAS_IOMEM && PCI
        select PWM_LPSS
        help
          The PCI driver for Intel Low Power Subsystem PWM controller.
@@ -301,7 +342,8 @@ config PWM_LPSS_PCI
 
 config PWM_LPSS_PLATFORM
        tristate "Intel LPSS PWM platform driver"
-       depends on X86 && ACPI
+       depends on (X86 && ACPI) || COMPILE_TEST
+       depends on HAS_IOMEM
        select PWM_LPSS
        help
          The platform driver for Intel Low Power Subsystem PWM controller.
@@ -312,7 +354,7 @@ config PWM_LPSS_PLATFORM
 config PWM_MESON
        tristate "Amlogic Meson PWM driver"
        depends on ARCH_MESON || COMPILE_TEST
-       depends on COMMON_CLK
+       depends on COMMON_CLK && HAS_IOMEM
        help
          The platform driver for Amlogic Meson PWM controller.
 
@@ -333,6 +375,7 @@ config PWM_MTK_DISP
 config PWM_MEDIATEK
        tristate "MediaTek PWM support"
        depends on ARCH_MEDIATEK || RALINK || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for Mediatek ARM SoC.
 
@@ -341,8 +384,8 @@ config PWM_MEDIATEK
 
 config PWM_MXS
        tristate "Freescale MXS PWM support"
-       depends on OF
        depends on ARCH_MXS || COMPILE_TEST
+       depends on HAS_IOMEM && OF
        select STMP_DEVICE
        help
          Generic PWM framework driver for Freescale MXS.
@@ -373,6 +416,7 @@ config PWM_PCA9685
 config PWM_PXA
        tristate "PXA PWM support"
        depends on ARCH_PXA || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for PXA.
 
@@ -404,6 +448,7 @@ config PWM_RENESAS_TPU
 config PWM_ROCKCHIP
        tristate "Rockchip PWM support"
        depends on ARCH_ROCKCHIP || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for the PWM controller found on
          Rockchip SoCs.
@@ -411,6 +456,7 @@ config PWM_ROCKCHIP
 config PWM_SAMSUNG
        tristate "Samsung PWM support"
        depends on PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for Samsung.
 
@@ -420,7 +466,7 @@ config PWM_SAMSUNG
 config PWM_SIFIVE
        tristate "SiFive PWM support"
        depends on OF
-       depends on COMMON_CLK
+       depends on COMMON_CLK && HAS_IOMEM
        depends on RISCV || COMPILE_TEST
        help
          Generic PWM framework driver for SiFive SoCs.
@@ -441,7 +487,7 @@ config PWM_SL28CPLD
 config PWM_SPEAR
        tristate "STMicroelectronics SPEAr PWM support"
        depends on PLAT_SPEAR || COMPILE_TEST
-       depends on OF
+       depends on HAS_IOMEM && OF
        help
          Generic PWM framework driver for the PWM controller on ST
          SPEAr SoCs.
@@ -463,7 +509,7 @@ config PWM_SPRD
 config PWM_STI
        tristate "STiH4xx PWM support"
        depends on ARCH_STI || COMPILE_TEST
-       depends on OF
+       depends on HAS_IOMEM && OF
        help
          Generic PWM framework driver for STiH4xx SoCs.
 
@@ -509,6 +555,7 @@ config PWM_SUN4I
 config PWM_TEGRA
        tristate "NVIDIA Tegra PWM support"
        depends on ARCH_TEGRA || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for the PWFM controller found on NVIDIA
          Tegra SoCs.
@@ -519,6 +566,7 @@ config PWM_TEGRA
 config PWM_TIECAP
        tristate "ECAP PWM support"
        depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          PWM driver support for the ECAP APWM controller found on TI SOCs
 
@@ -528,6 +576,7 @@ config PWM_TIECAP
 config PWM_TIEHRPWM
        tristate "EHRPWM PWM support"
        depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_K3 || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          PWM driver support for the EHRPWM controller found on TI SOCs
 
@@ -555,6 +604,7 @@ config PWM_TWL_LED
 config PWM_VT8500
        tristate "vt8500 PWM support"
        depends on ARCH_VT8500 || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for vt8500.
 
@@ -564,6 +614,7 @@ config PWM_VT8500
 config PWM_ZX
        tristate "ZTE ZX PWM support"
        depends on ARCH_ZX || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Generic PWM framework driver for ZTE ZX family SoCs.
 
index cbdcd55..18b89d7 100644 (file)
@@ -13,6 +13,7 @@ obj-$(CONFIG_PWM_BRCMSTB)     += pwm-brcmstb.o
 obj-$(CONFIG_PWM_CLPS711X)     += pwm-clps711x.o
 obj-$(CONFIG_PWM_CRC)          += pwm-crc.o
 obj-$(CONFIG_PWM_CROS_EC)      += pwm-cros-ec.o
+obj-$(CONFIG_PWM_DWC)          += pwm-dwc.o
 obj-$(CONFIG_PWM_EP93XX)       += pwm-ep93xx.o
 obj-$(CONFIG_PWM_FSL_FTM)      += pwm-fsl-ftm.o
 obj-$(CONFIG_PWM_HIBVT)                += pwm-hibvt.o
@@ -20,8 +21,10 @@ obj-$(CONFIG_PWM_IMG)                += pwm-img.o
 obj-$(CONFIG_PWM_IMX1)         += pwm-imx1.o
 obj-$(CONFIG_PWM_IMX27)                += pwm-imx27.o
 obj-$(CONFIG_PWM_IMX_TPM)      += pwm-imx-tpm.o
+obj-$(CONFIG_PWM_INTEL_LGM)    += pwm-intel-lgm.o
 obj-$(CONFIG_PWM_IQS620A)      += pwm-iqs620a.o
 obj-$(CONFIG_PWM_JZ4740)       += pwm-jz4740.o
+obj-$(CONFIG_PWM_KEEMBAY)      += pwm-keembay.o
 obj-$(CONFIG_PWM_LP3943)       += pwm-lp3943.o
 obj-$(CONFIG_PWM_LPC18XX_SCT)  += pwm-lpc18xx-sct.o
 obj-$(CONFIG_PWM_LPC32XX)      += pwm-lpc32xx.o
index 1f16f53..a8eff4b 100644 (file)
@@ -1338,7 +1338,7 @@ DEFINE_SEQ_ATTRIBUTE(pwm_debugfs);
 
 static int __init pwm_debugfs_init(void)
 {
-       debugfs_create_file("pwm", S_IFREG | S_IRUGO, NULL, NULL,
+       debugfs_create_file("pwm", S_IFREG | 0444, NULL, NULL,
                            &pwm_debugfs_fops);
 
        return 0;
index fdf3964..58c6c0f 100644 (file)
@@ -101,12 +101,12 @@ static int ab8500_pwm_probe(struct platform_device *pdev)
 
        ab8500->chip.dev = &pdev->dev;
        ab8500->chip.ops = &ab8500_pwm_ops;
-       ab8500->chip.base = pdev->id;
+       ab8500->chip.base = -1;
        ab8500->chip.npwm = 1;
 
        err = pwmchip_add(&ab8500->chip);
        if (err < 0)
-               return err;
+               return dev_err_probe(&pdev->dev, err, "Failed to add pwm chip\n");
 
        dev_dbg(&pdev->dev, "pwm probe successful\n");
        platform_set_drvdata(pdev, ab8500);
index 85c5370..5ccc3e7 100644 (file)
 #include <linux/err.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/regmap.h>
 #include <linux/slab.h>
 #include <soc/at91/atmel_tcb.h>
 
-#define NPWM   6
+#define NPWM   2
 
 #define ATMEL_TC_ACMR_MASK     (ATMEL_TC_ACPA | ATMEL_TC_ACPC |        \
                                 ATMEL_TC_AEEVT | ATMEL_TC_ASWTRG)
@@ -48,11 +51,18 @@ struct atmel_tcb_channel {
 struct atmel_tcb_pwm_chip {
        struct pwm_chip chip;
        spinlock_t lock;
-       struct atmel_tc *tc;
+       u8 channel;
+       u8 width;
+       struct regmap *regmap;
+       struct clk *clk;
+       struct clk *gclk;
+       struct clk *slow_clk;
        struct atmel_tcb_pwm_device *pwms[NPWM];
-       struct atmel_tcb_channel bkup[NPWM / 2];
+       struct atmel_tcb_channel bkup;
 };
 
+const u8 atmel_tcb_divisors[] = { 2, 8, 32, 128, 0, };
+
 static inline struct atmel_tcb_pwm_chip *to_tcb_chip(struct pwm_chip *chip)
 {
        return container_of(chip, struct atmel_tcb_pwm_chip, chip);
@@ -74,10 +84,6 @@ static int atmel_tcb_pwm_request(struct pwm_chip *chip,
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
        struct atmel_tcb_pwm_device *tcbpwm;
-       struct atmel_tc *tc = tcbpwmc->tc;
-       void __iomem *regs = tc->regs;
-       unsigned group = pwm->hwpwm / 2;
-       unsigned index = pwm->hwpwm % 2;
        unsigned cmr;
        int ret;
 
@@ -85,7 +91,7 @@ static int atmel_tcb_pwm_request(struct pwm_chip *chip,
        if (!tcbpwm)
                return -ENOMEM;
 
-       ret = clk_prepare_enable(tc->clk[group]);
+       ret = clk_prepare_enable(tcbpwmc->clk);
        if (ret) {
                devm_kfree(chip->dev, tcbpwm);
                return ret;
@@ -98,28 +104,31 @@ static int atmel_tcb_pwm_request(struct pwm_chip *chip,
        tcbpwm->div = 0;
 
        spin_lock(&tcbpwmc->lock);
-       cmr = __raw_readl(regs + ATMEL_TC_REG(group, CMR));
+       regmap_read(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), &cmr);
        /*
         * Get init config from Timer Counter registers if
         * Timer Counter is already configured as a PWM generator.
         */
        if (cmr & ATMEL_TC_WAVE) {
-               if (index == 0)
-                       tcbpwm->duty =
-                               __raw_readl(regs + ATMEL_TC_REG(group, RA));
+               if (pwm->hwpwm == 0)
+                       regmap_read(tcbpwmc->regmap,
+                                   ATMEL_TC_REG(tcbpwmc->channel, RA),
+                                   &tcbpwm->duty);
                else
-                       tcbpwm->duty =
-                               __raw_readl(regs + ATMEL_TC_REG(group, RB));
+                       regmap_read(tcbpwmc->regmap,
+                                   ATMEL_TC_REG(tcbpwmc->channel, RB),
+                                   &tcbpwm->duty);
 
                tcbpwm->div = cmr & ATMEL_TC_TCCLKS;
-               tcbpwm->period = __raw_readl(regs + ATMEL_TC_REG(group, RC));
+               regmap_read(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, RC),
+                           &tcbpwm->period);
                cmr &= (ATMEL_TC_TCCLKS | ATMEL_TC_ACMR_MASK |
                        ATMEL_TC_BCMR_MASK);
        } else
                cmr = 0;
 
        cmr |= ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO | ATMEL_TC_EEVT_XC0;
-       __raw_writel(cmr, regs + ATMEL_TC_REG(group, CMR));
+       regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), cmr);
        spin_unlock(&tcbpwmc->lock);
 
        tcbpwmc->pwms[pwm->hwpwm] = tcbpwm;
@@ -131,9 +140,8 @@ static void atmel_tcb_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
        struct atmel_tcb_pwm_device *tcbpwm = pwm_get_chip_data(pwm);
-       struct atmel_tc *tc = tcbpwmc->tc;
 
-       clk_disable_unprepare(tc->clk[pwm->hwpwm / 2]);
+       clk_disable_unprepare(tcbpwmc->clk);
        tcbpwmc->pwms[pwm->hwpwm] = NULL;
        devm_kfree(chip->dev, tcbpwm);
 }
@@ -142,10 +150,6 @@ static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
        struct atmel_tcb_pwm_device *tcbpwm = pwm_get_chip_data(pwm);
-       struct atmel_tc *tc = tcbpwmc->tc;
-       void __iomem *regs = tc->regs;
-       unsigned group = pwm->hwpwm / 2;
-       unsigned index = pwm->hwpwm % 2;
        unsigned cmr;
        enum pwm_polarity polarity = tcbpwm->polarity;
 
@@ -161,10 +165,10 @@ static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
                polarity = !polarity;
 
        spin_lock(&tcbpwmc->lock);
-       cmr = __raw_readl(regs + ATMEL_TC_REG(group, CMR));
+       regmap_read(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), &cmr);
 
        /* flush old setting and set the new one */
-       if (index == 0) {
+       if (pwm->hwpwm == 0) {
                cmr &= ~ATMEL_TC_ACMR_MASK;
                if (polarity == PWM_POLARITY_INVERSED)
                        cmr |= ATMEL_TC_ASWTRG_CLEAR;
@@ -178,20 +182,22 @@ static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
                        cmr |= ATMEL_TC_BSWTRG_SET;
        }
 
-       __raw_writel(cmr, regs + ATMEL_TC_REG(group, CMR));
+       regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), cmr);
 
        /*
         * Use software trigger to apply the new setting.
         * If both PWM devices in this group are disabled we stop the clock.
         */
        if (!(cmr & (ATMEL_TC_ACPC | ATMEL_TC_BCPC))) {
-               __raw_writel(ATMEL_TC_SWTRG | ATMEL_TC_CLKDIS,
-                            regs + ATMEL_TC_REG(group, CCR));
-               tcbpwmc->bkup[group].enabled = 1;
+               regmap_write(tcbpwmc->regmap,
+                            ATMEL_TC_REG(tcbpwmc->channel, CCR),
+                            ATMEL_TC_SWTRG | ATMEL_TC_CLKDIS);
+               tcbpwmc->bkup.enabled = 1;
        } else {
-               __raw_writel(ATMEL_TC_SWTRG, regs +
-                            ATMEL_TC_REG(group, CCR));
-               tcbpwmc->bkup[group].enabled = 0;
+               regmap_write(tcbpwmc->regmap,
+                            ATMEL_TC_REG(tcbpwmc->channel, CCR),
+                            ATMEL_TC_SWTRG);
+               tcbpwmc->bkup.enabled = 0;
        }
 
        spin_unlock(&tcbpwmc->lock);
@@ -201,10 +207,6 @@ static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
        struct atmel_tcb_pwm_device *tcbpwm = pwm_get_chip_data(pwm);
-       struct atmel_tc *tc = tcbpwmc->tc;
-       void __iomem *regs = tc->regs;
-       unsigned group = pwm->hwpwm / 2;
-       unsigned index = pwm->hwpwm % 2;
        u32 cmr;
        enum pwm_polarity polarity = tcbpwm->polarity;
 
@@ -220,12 +222,12 @@ static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
                polarity = !polarity;
 
        spin_lock(&tcbpwmc->lock);
-       cmr = __raw_readl(regs + ATMEL_TC_REG(group, CMR));
+       regmap_read(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), &cmr);
 
        /* flush old setting and set the new one */
        cmr &= ~ATMEL_TC_TCCLKS;
 
-       if (index == 0) {
+       if (pwm->hwpwm == 0) {
                cmr &= ~ATMEL_TC_ACMR_MASK;
 
                /* Set CMR flags according to given polarity */
@@ -248,7 +250,7 @@ static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
         * this config till next config call.
         */
        if (tcbpwm->duty != tcbpwm->period && tcbpwm->duty > 0) {
-               if (index == 0) {
+               if (pwm->hwpwm == 0) {
                        if (polarity == PWM_POLARITY_INVERSED)
                                cmr |= ATMEL_TC_ACPA_SET | ATMEL_TC_ACPC_CLEAR;
                        else
@@ -263,19 +265,24 @@ static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 
        cmr |= (tcbpwm->div & ATMEL_TC_TCCLKS);
 
-       __raw_writel(cmr, regs + ATMEL_TC_REG(group, CMR));
+       regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), cmr);
 
-       if (index == 0)
-               __raw_writel(tcbpwm->duty, regs + ATMEL_TC_REG(group, RA));
+       if (pwm->hwpwm == 0)
+               regmap_write(tcbpwmc->regmap,
+                            ATMEL_TC_REG(tcbpwmc->channel, RA),
+                            tcbpwm->duty);
        else
-               __raw_writel(tcbpwm->duty, regs + ATMEL_TC_REG(group, RB));
+               regmap_write(tcbpwmc->regmap,
+                            ATMEL_TC_REG(tcbpwmc->channel, RB),
+                            tcbpwm->duty);
 
-       __raw_writel(tcbpwm->period, regs + ATMEL_TC_REG(group, RC));
+       regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, RC),
+                    tcbpwm->period);
 
        /* Use software trigger to apply the new setting */
-       __raw_writel(ATMEL_TC_CLKEN | ATMEL_TC_SWTRG,
-                    regs + ATMEL_TC_REG(group, CCR));
-       tcbpwmc->bkup[group].enabled = 1;
+       regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CCR),
+                    ATMEL_TC_SWTRG | ATMEL_TC_CLKEN);
+       tcbpwmc->bkup.enabled = 1;
        spin_unlock(&tcbpwmc->lock);
        return 0;
 }
@@ -285,29 +292,29 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
        struct atmel_tcb_pwm_device *tcbpwm = pwm_get_chip_data(pwm);
-       unsigned group = pwm->hwpwm / 2;
-       unsigned index = pwm->hwpwm % 2;
        struct atmel_tcb_pwm_device *atcbpwm = NULL;
-       struct atmel_tc *tc = tcbpwmc->tc;
-       int i;
+       int i = 0;
        int slowclk = 0;
        unsigned period;
        unsigned duty;
-       unsigned rate = clk_get_rate(tc->clk[group]);
+       unsigned rate = clk_get_rate(tcbpwmc->clk);
        unsigned long long min;
        unsigned long long max;
 
        /*
         * Find best clk divisor:
         * the smallest divisor which can fulfill the period_ns requirements.
+        * If there is a gclk, the first divisor is actuallly the gclk selector
         */
-       for (i = 0; i < 5; ++i) {
-               if (atmel_tc_divisors[i] == 0) {
+       if (tcbpwmc->gclk)
+               i = 1;
+       for (; i < ARRAY_SIZE(atmel_tcb_divisors); ++i) {
+               if (atmel_tcb_divisors[i] == 0) {
                        slowclk = i;
                        continue;
                }
-               min = div_u64((u64)NSEC_PER_SEC * atmel_tc_divisors[i], rate);
-               max = min << tc->tcb_config->counter_width;
+               min = div_u64((u64)NSEC_PER_SEC * atmel_tcb_divisors[i], rate);
+               max = min << tcbpwmc->width;
                if (max >= period_ns)
                        break;
        }
@@ -316,11 +323,11 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
         * If none of the divisor are small enough to represent period_ns
         * take slow clock (32KHz).
         */
-       if (i == 5) {
+       if (i == ARRAY_SIZE(atmel_tcb_divisors)) {
                i = slowclk;
-               rate = clk_get_rate(tc->slow_clk);
+               rate = clk_get_rate(tcbpwmc->slow_clk);
                min = div_u64(NSEC_PER_SEC, rate);
-               max = min << tc->tcb_config->counter_width;
+               max = min << tcbpwmc->width;
 
                /* If period is too big return ERANGE error */
                if (max < period_ns)
@@ -330,17 +337,13 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        duty = div_u64(duty_ns, min);
        period = div_u64(period_ns, min);
 
-       if (index == 0)
-               atcbpwm = tcbpwmc->pwms[pwm->hwpwm + 1];
+       if (pwm->hwpwm == 0)
+               atcbpwm = tcbpwmc->pwms[1];
        else
-               atcbpwm = tcbpwmc->pwms[pwm->hwpwm - 1];
+               atcbpwm = tcbpwmc->pwms[0];
 
        /*
-        * PWM devices provided by TCB driver are grouped by 2:
-        * - group 0: PWM 0 & 1
-        * - group 1: PWM 2 & 3
-        * - group 2: PWM 4 & 5
-        *
+        * PWM devices provided by the TCB driver are grouped by 2.
         * PWM devices in a given group must be configured with the
         * same period_ns.
         *
@@ -376,32 +379,75 @@ static const struct pwm_ops atmel_tcb_pwm_ops = {
        .owner = THIS_MODULE,
 };
 
+static struct atmel_tcb_config tcb_rm9200_config = {
+       .counter_width = 16,
+};
+
+static struct atmel_tcb_config tcb_sam9x5_config = {
+       .counter_width = 32,
+};
+
+static struct atmel_tcb_config tcb_sama5d2_config = {
+       .counter_width = 32,
+       .has_gclk = 1,
+};
+
+static const struct of_device_id atmel_tcb_of_match[] = {
+       { .compatible = "atmel,at91rm9200-tcb", .data = &tcb_rm9200_config, },
+       { .compatible = "atmel,at91sam9x5-tcb", .data = &tcb_sam9x5_config, },
+       { .compatible = "atmel,sama5d2-tcb", .data = &tcb_sama5d2_config, },
+       { /* sentinel */ }
+};
+
 static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 {
+       const struct of_device_id *match;
        struct atmel_tcb_pwm_chip *tcbpwm;
+       const struct atmel_tcb_config *config;
        struct device_node *np = pdev->dev.of_node;
-       struct atmel_tc *tc;
+       struct regmap *regmap;
+       struct clk *clk, *gclk = NULL;
+       struct clk *slow_clk;
+       char clk_name[] = "t0_clk";
        int err;
-       int tcblock;
+       int channel;
 
-       err = of_property_read_u32(np, "tc-block", &tcblock);
+       err = of_property_read_u32(np, "reg", &channel);
        if (err < 0) {
                dev_err(&pdev->dev,
-                       "failed to get Timer Counter Block number from device tree (error: %d)\n",
+                       "failed to get Timer Counter Block channel from device tree (error: %d)\n",
                        err);
                return err;
        }
 
-       tc = atmel_tc_alloc(tcblock);
-       if (tc == NULL) {
-               dev_err(&pdev->dev, "failed to allocate Timer Counter Block\n");
-               return -ENOMEM;
+       regmap = syscon_node_to_regmap(np->parent);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
+       if (IS_ERR(slow_clk))
+               return PTR_ERR(slow_clk);
+
+       clk_name[1] += channel;
+       clk = of_clk_get_by_name(np->parent, clk_name);
+       if (IS_ERR(clk))
+               clk = of_clk_get_by_name(np->parent, "t0_clk");
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+
+       match = of_match_node(atmel_tcb_of_match, np->parent);
+       config = match->data;
+
+       if (config->has_gclk) {
+               gclk = of_clk_get_by_name(np->parent, "gclk");
+               if (IS_ERR(gclk))
+                       return PTR_ERR(gclk);
        }
 
        tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
        if (tcbpwm == NULL) {
                err = -ENOMEM;
-               goto err_free_tc;
+               goto err_slow_clk;
        }
 
        tcbpwm->chip.dev = &pdev->dev;
@@ -410,11 +456,16 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
        tcbpwm->chip.of_pwm_n_cells = 3;
        tcbpwm->chip.base = -1;
        tcbpwm->chip.npwm = NPWM;
-       tcbpwm->tc = tc;
-
-       err = clk_prepare_enable(tc->slow_clk);
+       tcbpwm->channel = channel;
+       tcbpwm->regmap = regmap;
+       tcbpwm->clk = clk;
+       tcbpwm->gclk = gclk;
+       tcbpwm->slow_clk = slow_clk;
+       tcbpwm->width = config->counter_width;
+
+       err = clk_prepare_enable(slow_clk);
        if (err)
-               goto err_free_tc;
+               goto err_slow_clk;
 
        spin_lock_init(&tcbpwm->lock);
 
@@ -427,10 +478,10 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
        return 0;
 
 err_disable_clk:
-       clk_disable_unprepare(tcbpwm->tc->slow_clk);
+       clk_disable_unprepare(tcbpwm->slow_clk);
 
-err_free_tc:
-       atmel_tc_free(tc);
+err_slow_clk:
+       clk_put(slow_clk);
 
        return err;
 }
@@ -440,14 +491,14 @@ static int atmel_tcb_pwm_remove(struct platform_device *pdev)
        struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
        int err;
 
-       clk_disable_unprepare(tcbpwm->tc->slow_clk);
+       clk_disable_unprepare(tcbpwm->slow_clk);
+       clk_put(tcbpwm->slow_clk);
+       clk_put(tcbpwm->clk);
 
        err = pwmchip_remove(&tcbpwm->chip);
        if (err < 0)
                return err;
 
-       atmel_tc_free(tcbpwm->tc);
-
        return 0;
 }
 
@@ -461,38 +512,33 @@ MODULE_DEVICE_TABLE(of, atmel_tcb_pwm_dt_ids);
 static int atmel_tcb_pwm_suspend(struct device *dev)
 {
        struct atmel_tcb_pwm_chip *tcbpwm = dev_get_drvdata(dev);
-       void __iomem *base = tcbpwm->tc->regs;
-       int i;
+       struct atmel_tcb_channel *chan = &tcbpwm->bkup;
+       unsigned int channel = tcbpwm->channel;
 
-       for (i = 0; i < (NPWM / 2); i++) {
-               struct atmel_tcb_channel *chan = &tcbpwm->bkup[i];
+       regmap_read(tcbpwm->regmap, ATMEL_TC_REG(channel, CMR), &chan->cmr);
+       regmap_read(tcbpwm->regmap, ATMEL_TC_REG(channel, RA), &chan->ra);
+       regmap_read(tcbpwm->regmap, ATMEL_TC_REG(channel, RB), &chan->rb);
+       regmap_read(tcbpwm->regmap, ATMEL_TC_REG(channel, RC), &chan->rc);
 
-               chan->cmr = readl(base + ATMEL_TC_REG(i, CMR));
-               chan->ra = readl(base + ATMEL_TC_REG(i, RA));
-               chan->rb = readl(base + ATMEL_TC_REG(i, RB));
-               chan->rc = readl(base + ATMEL_TC_REG(i, RC));
-       }
        return 0;
 }
 
 static int atmel_tcb_pwm_resume(struct device *dev)
 {
        struct atmel_tcb_pwm_chip *tcbpwm = dev_get_drvdata(dev);
-       void __iomem *base = tcbpwm->tc->regs;
-       int i;
-
-       for (i = 0; i < (NPWM / 2); i++) {
-               struct atmel_tcb_channel *chan = &tcbpwm->bkup[i];
-
-               writel(chan->cmr, base + ATMEL_TC_REG(i, CMR));
-               writel(chan->ra, base + ATMEL_TC_REG(i, RA));
-               writel(chan->rb, base + ATMEL_TC_REG(i, RB));
-               writel(chan->rc, base + ATMEL_TC_REG(i, RC));
-               if (chan->enabled) {
-                       writel(ATMEL_TC_CLKEN | ATMEL_TC_SWTRG,
-                               base + ATMEL_TC_REG(i, CCR));
-               }
-       }
+       struct atmel_tcb_channel *chan = &tcbpwm->bkup;
+       unsigned int channel = tcbpwm->channel;
+
+       regmap_write(tcbpwm->regmap, ATMEL_TC_REG(channel, CMR), chan->cmr);
+       regmap_write(tcbpwm->regmap, ATMEL_TC_REG(channel, RA), chan->ra);
+       regmap_write(tcbpwm->regmap, ATMEL_TC_REG(channel, RB), chan->rb);
+       regmap_write(tcbpwm->regmap, ATMEL_TC_REG(channel, RC), chan->rc);
+
+       if (chan->enabled)
+               regmap_write(tcbpwm->regmap,
+                            ATMEL_TC_CLKEN | ATMEL_TC_SWTRG,
+                            ATMEL_TC_REG(channel, CCR));
+
        return 0;
 }
 #endif
index 6161e7e..5813339 100644 (file)
@@ -401,7 +401,6 @@ MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
 static int atmel_pwm_probe(struct platform_device *pdev)
 {
        struct atmel_pwm_chip *atmel_pwm;
-       struct resource *res;
        int ret;
 
        atmel_pwm = devm_kzalloc(&pdev->dev, sizeof(*atmel_pwm), GFP_KERNEL);
@@ -412,8 +411,7 @@ static int atmel_pwm_probe(struct platform_device *pdev)
        atmel_pwm->data = of_device_get_match_data(&pdev->dev);
        atmel_pwm->updated_pwms = 0;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       atmel_pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       atmel_pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(atmel_pwm->base))
                return PTR_ERR(atmel_pwm->base);
 
index 79b1e58..f4853c4 100644 (file)
@@ -197,7 +197,6 @@ static const struct pwm_ops iproc_pwm_ops = {
 static int iproc_pwmc_probe(struct platform_device *pdev)
 {
        struct iproc_pwmc *ip;
-       struct resource *res;
        unsigned int i;
        u32 value;
        int ret;
@@ -215,8 +214,7 @@ static int iproc_pwmc_probe(struct platform_device *pdev)
        ip->chip.of_xlate = of_pwm_xlate_with_flags;
        ip->chip.of_pwm_n_cells = 3;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       ip->base = devm_ioremap_resource(&pdev->dev, res);
+       ip->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(ip->base))
                return PTR_ERR(ip->base);
 
index 16c5898..578b362 100644 (file)
@@ -259,7 +259,6 @@ static const struct pwm_ops kona_pwm_ops = {
 static int kona_pwmc_probe(struct platform_device *pdev)
 {
        struct kona_pwmc *kp;
-       struct resource *res;
        unsigned int chan;
        unsigned int value = 0;
        int ret = 0;
@@ -277,8 +276,7 @@ static int kona_pwmc_probe(struct platform_device *pdev)
        kp->chip.of_xlate = of_pwm_xlate_with_flags;
        kp->chip.of_pwm_n_cells = 3;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       kp->base = devm_ioremap_resource(&pdev->dev, res);
+       kp->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kp->base))
                return PTR_ERR(kp->base);
 
index 6841dcf..6ff5f04 100644 (file)
@@ -58,13 +58,15 @@ static void bcm2835_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
        writel(value, pc->base + PWM_CONTROL);
 }
 
-static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                             int duty_ns, int period_ns)
+static int bcm2835_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                            const struct pwm_state *state)
 {
+
        struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
        unsigned long rate = clk_get_rate(pc->clk);
+       unsigned long long period;
        unsigned long scaler;
-       u32 period;
+       u32 val;
 
        if (!rate) {
                dev_err(pc->dev, "failed to get clock rate\n");
@@ -72,54 +74,34 @@ static int bcm2835_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate);
-       period = DIV_ROUND_CLOSEST(period_ns, scaler);
+       /* set period */
+       period = DIV_ROUND_CLOSEST_ULL(state->period, scaler);
 
-       if (period < PERIOD_MIN)
+       /* dont accept a period that is too small or has been truncated */
+       if ((period < PERIOD_MIN) || (period > U32_MAX))
                return -EINVAL;
 
-       writel(DIV_ROUND_CLOSEST(duty_ns, scaler),
-              pc->base + DUTY(pwm->hwpwm));
        writel(period, pc->base + PERIOD(pwm->hwpwm));
 
-       return 0;
-}
-
-static int bcm2835_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-       struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
-       u32 value;
-
-       value = readl(pc->base + PWM_CONTROL);
-       value |= PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm);
-       writel(value, pc->base + PWM_CONTROL);
+       /* set duty cycle */
+       val = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, scaler);
+       writel(val, pc->base + DUTY(pwm->hwpwm));
 
-       return 0;
-}
+       /* set polarity */
+       val = readl(pc->base + PWM_CONTROL);
 
-static void bcm2835_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
-       struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
-       u32 value;
-
-       value = readl(pc->base + PWM_CONTROL);
-       value &= ~(PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm));
-       writel(value, pc->base + PWM_CONTROL);
-}
-
-static int bcm2835_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
-                               enum pwm_polarity polarity)
-{
-       struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
-       u32 value;
-
-       value = readl(pc->base + PWM_CONTROL);
+       if (state->polarity == PWM_POLARITY_NORMAL)
+               val &= ~(PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm));
+       else
+               val |= PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm);
 
-       if (polarity == PWM_POLARITY_NORMAL)
-               value &= ~(PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm));
+       /* enable/disable */
+       if (state->enabled)
+               val |= PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm);
        else
-               value |= PWM_POLARITY << PWM_CONTROL_SHIFT(pwm->hwpwm);
+               val &= ~(PWM_ENABLE << PWM_CONTROL_SHIFT(pwm->hwpwm));
 
-       writel(value, pc->base + PWM_CONTROL);
+       writel(val, pc->base + PWM_CONTROL);
 
        return 0;
 }
@@ -127,17 +109,13 @@ static int bcm2835_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
 static const struct pwm_ops bcm2835_pwm_ops = {
        .request = bcm2835_pwm_request,
        .free = bcm2835_pwm_free,
-       .config = bcm2835_pwm_config,
-       .enable = bcm2835_pwm_enable,
-       .disable = bcm2835_pwm_disable,
-       .set_polarity = bcm2835_set_polarity,
+       .apply = bcm2835_pwm_apply,
        .owner = THIS_MODULE,
 };
 
 static int bcm2835_pwm_probe(struct platform_device *pdev)
 {
        struct bcm2835_pwm *pc;
-       struct resource *res;
        int ret;
 
        pc = devm_kzalloc(&pdev->dev, sizeof(*pc), GFP_KERNEL);
@@ -146,8 +124,7 @@ static int bcm2835_pwm_probe(struct platform_device *pdev)
 
        pc->dev = &pdev->dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pc->base = devm_ioremap_resource(&pdev->dev, res);
+       pc->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->base))
                return PTR_ERR(pc->base);
 
index b91c477..fe40528 100644 (file)
@@ -186,15 +186,13 @@ MODULE_DEVICE_TABLE(of, berlin_pwm_match);
 static int berlin_pwm_probe(struct platform_device *pdev)
 {
        struct berlin_pwm_chip *pwm;
-       struct resource *res;
        int ret;
 
        pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
        if (!pwm)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pwm->base))
                return PTR_ERR(pwm->base);
 
index fea612c..8b66f9d 100644 (file)
@@ -234,7 +234,6 @@ MODULE_DEVICE_TABLE(of, brcmstb_pwm_of_match);
 static int brcmstb_pwm_probe(struct platform_device *pdev)
 {
        struct brcmstb_pwm *p;
-       struct resource *res;
        int ret;
 
        p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
@@ -262,8 +261,7 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
        p->chip.base = -1;
        p->chip.npwm = 2;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       p->base = devm_ioremap_resource(&pdev->dev, res);
+       p->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(p->base)) {
                ret = PTR_ERR(p->base);
                goto out_clk;
index ba9500a..cb1af86 100644 (file)
@@ -113,14 +113,12 @@ static struct pwm_device *clps711x_pwm_xlate(struct pwm_chip *chip,
 static int clps711x_pwm_probe(struct platform_device *pdev)
 {
        struct clps711x_chip *priv;
-       struct resource *res;
 
        priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->pmpcon = devm_ioremap_resource(&pdev->dev, res);
+       priv->pmpcon = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->pmpcon))
                return PTR_ERR(priv->pmpcon);
 
index ecfdfac..1e22768 100644 (file)
@@ -64,7 +64,7 @@ static int crc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
        if (state->polarity != PWM_POLARITY_NORMAL)
-               return -EOPNOTSUPP;
+               return -EINVAL;
 
        if (pwm_is_enabled(pwm) && !state->enabled) {
                err = regmap_write(crc_pwm->regmap, BACKLIGHT_EN, 0);
diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c
new file mode 100644 (file)
index 0000000..f6c98e0
--- /dev/null
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DesignWare PWM Controller driver
+ *
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * Author: Felipe Balbi (Intel)
+ * Author: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+ * Author: Raymond Tan <raymond.tan@intel.com>
+ *
+ * Limitations:
+ * - The hardware cannot generate a 0 % or 100 % duty cycle. Both high and low
+ *   periods are one or more input clock periods long.
+ */
+
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/pwm.h>
+
+#define DWC_TIM_LD_CNT(n)      ((n) * 0x14)
+#define DWC_TIM_LD_CNT2(n)     (((n) * 4) + 0xb0)
+#define DWC_TIM_CUR_VAL(n)     (((n) * 0x14) + 0x04)
+#define DWC_TIM_CTRL(n)                (((n) * 0x14) + 0x08)
+#define DWC_TIM_EOI(n)         (((n) * 0x14) + 0x0c)
+#define DWC_TIM_INT_STS(n)     (((n) * 0x14) + 0x10)
+
+#define DWC_TIMERS_INT_STS     0xa0
+#define DWC_TIMERS_EOI         0xa4
+#define DWC_TIMERS_RAW_INT_STS 0xa8
+#define DWC_TIMERS_COMP_VERSION        0xac
+
+#define DWC_TIMERS_TOTAL       8
+#define DWC_CLK_PERIOD_NS      10
+
+/* Timer Control Register */
+#define DWC_TIM_CTRL_EN                BIT(0)
+#define DWC_TIM_CTRL_MODE      BIT(1)
+#define DWC_TIM_CTRL_MODE_FREE (0 << 1)
+#define DWC_TIM_CTRL_MODE_USER (1 << 1)
+#define DWC_TIM_CTRL_INT_MASK  BIT(2)
+#define DWC_TIM_CTRL_PWM       BIT(3)
+
+struct dwc_pwm_ctx {
+       u32 cnt;
+       u32 cnt2;
+       u32 ctrl;
+};
+
+struct dwc_pwm {
+       struct pwm_chip chip;
+       void __iomem *base;
+       struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL];
+};
+#define to_dwc_pwm(p)  (container_of((p), struct dwc_pwm, chip))
+
+static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset)
+{
+       return readl(dwc->base + offset);
+}
+
+static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset)
+{
+       writel(value, dwc->base + offset);
+}
+
+static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled)
+{
+       u32 reg;
+
+       reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm));
+
+       if (enabled)
+               reg |= DWC_TIM_CTRL_EN;
+       else
+               reg &= ~DWC_TIM_CTRL_EN;
+
+       dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm));
+}
+
+static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
+                                    struct pwm_device *pwm,
+                                    const struct pwm_state *state)
+{
+       u64 tmp;
+       u32 ctrl;
+       u32 high;
+       u32 low;
+
+       /*
+        * Calculate width of low and high period in terms of input clock
+        * periods and check are the result within HW limits between 1 and
+        * 2^32 periods.
+        */
+       tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, DWC_CLK_PERIOD_NS);
+       if (tmp < 1 || tmp > (1ULL << 32))
+               return -ERANGE;
+       low = tmp - 1;
+
+       tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
+                                   DWC_CLK_PERIOD_NS);
+       if (tmp < 1 || tmp > (1ULL << 32))
+               return -ERANGE;
+       high = tmp - 1;
+
+       /*
+        * Specification says timer usage flow is to disable timer, then
+        * program it followed by enable. It also says Load Count is loaded
+        * into timer after it is enabled - either after a disable or
+        * a reset. Based on measurements it happens also without disable
+        * whenever Load Count is updated. But follow the specification.
+        */
+       __dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
+
+       /*
+        * Write Load Count and Load Count 2 registers. Former defines the
+        * width of low period and latter the width of high period in terms
+        * multiple of input clock periods:
+        * Width = ((Count + 1) * input clock period).
+        */
+       dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm));
+       dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm));
+
+       /*
+        * Set user-defined mode, timer reloads from Load Count registers
+        * when it counts down to 0.
+        * Set PWM mode, it makes output to toggle and width of low and high
+        * periods are set by Load Count registers.
+        */
+       ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM;
+       dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm));
+
+       /*
+        * Enable timer. Output starts from low period.
+        */
+       __dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled);
+
+       return 0;
+}
+
+static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                        const struct pwm_state *state)
+{
+       struct dwc_pwm *dwc = to_dwc_pwm(chip);
+
+       if (state->polarity != PWM_POLARITY_INVERSED)
+               return -EINVAL;
+
+       if (state->enabled) {
+               if (!pwm->state.enabled)
+                       pm_runtime_get_sync(chip->dev);
+               return __dwc_pwm_configure_timer(dwc, pwm, state);
+       } else {
+               if (pwm->state.enabled) {
+                       __dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
+                       pm_runtime_put_sync(chip->dev);
+               }
+       }
+
+       return 0;
+}
+
+static void dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                             struct pwm_state *state)
+{
+       struct dwc_pwm *dwc = to_dwc_pwm(chip);
+       u64 duty, period;
+
+       pm_runtime_get_sync(chip->dev);
+
+       state->enabled = !!(dwc_pwm_readl(dwc,
+                               DWC_TIM_CTRL(pwm->hwpwm)) & DWC_TIM_CTRL_EN);
+
+       duty = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
+       duty += 1;
+       duty *= DWC_CLK_PERIOD_NS;
+       state->duty_cycle = duty;
+
+       period = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
+       period += 1;
+       period *= DWC_CLK_PERIOD_NS;
+       period += duty;
+       state->period = period;
+
+       state->polarity = PWM_POLARITY_INVERSED;
+
+       pm_runtime_put_sync(chip->dev);
+}
+
+static const struct pwm_ops dwc_pwm_ops = {
+       .apply = dwc_pwm_apply,
+       .get_state = dwc_pwm_get_state,
+       .owner = THIS_MODULE,
+};
+
+static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
+{
+       struct device *dev = &pci->dev;
+       struct dwc_pwm *dwc;
+       int ret;
+
+       dwc = devm_kzalloc(&pci->dev, sizeof(*dwc), GFP_KERNEL);
+       if (!dwc)
+               return -ENOMEM;
+
+       ret = pcim_enable_device(pci);
+       if (ret) {
+               dev_err(&pci->dev,
+                       "Failed to enable device (%pe)\n", ERR_PTR(ret));
+               return ret;
+       }
+
+       pci_set_master(pci);
+
+       ret = pcim_iomap_regions(pci, BIT(0), pci_name(pci));
+       if (ret) {
+               dev_err(&pci->dev,
+                       "Failed to iomap PCI BAR (%pe)\n", ERR_PTR(ret));
+               return ret;
+       }
+
+       dwc->base = pcim_iomap_table(pci)[0];
+       if (!dwc->base) {
+               dev_err(&pci->dev, "Base address missing\n");
+               return -ENOMEM;
+       }
+
+       pci_set_drvdata(pci, dwc);
+
+       dwc->chip.dev = dev;
+       dwc->chip.ops = &dwc_pwm_ops;
+       dwc->chip.npwm = DWC_TIMERS_TOTAL;
+       dwc->chip.base = -1;
+
+       ret = pwmchip_add(&dwc->chip);
+       if (ret)
+               return ret;
+
+       pm_runtime_put(dev);
+       pm_runtime_allow(dev);
+
+       return 0;
+}
+
+static void dwc_pwm_remove(struct pci_dev *pci)
+{
+       struct dwc_pwm *dwc = pci_get_drvdata(pci);
+
+       pm_runtime_forbid(&pci->dev);
+       pm_runtime_get_noresume(&pci->dev);
+
+       pwmchip_remove(&dwc->chip);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int dwc_pwm_suspend(struct device *dev)
+{
+       struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+       struct dwc_pwm *dwc = pci_get_drvdata(pdev);
+       int i;
+
+       for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+               if (dwc->chip.pwms[i].state.enabled) {
+                       dev_err(dev, "PWM %u in use by consumer (%s)\n",
+                               i, dwc->chip.pwms[i].label);
+                       return -EBUSY;
+               }
+               dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
+               dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
+               dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
+       }
+
+       return 0;
+}
+
+static int dwc_pwm_resume(struct device *dev)
+{
+       struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
+       struct dwc_pwm *dwc = pci_get_drvdata(pdev);
+       int i;
+
+       for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+               dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
+               dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
+               dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+       }
+
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(dwc_pwm_pm_ops, dwc_pwm_suspend, dwc_pwm_resume);
+
+static const struct pci_device_id dwc_pwm_id_table[] = {
+       { PCI_VDEVICE(INTEL, 0x4bb7) }, /* Elkhart Lake */
+       {  }    /* Terminating Entry */
+};
+MODULE_DEVICE_TABLE(pci, dwc_pwm_id_table);
+
+static struct pci_driver dwc_pwm_driver = {
+       .name = "pwm-dwc",
+       .probe = dwc_pwm_probe,
+       .remove = dwc_pwm_remove,
+       .id_table = dwc_pwm_id_table,
+       .driver = {
+               .pm = &dwc_pwm_pm_ops,
+       },
+};
+
+module_pci_driver(dwc_pwm_driver);
+
+MODULE_AUTHOR("Felipe Balbi (Intel)");
+MODULE_AUTHOR("Jarkko Nikula <jarkko.nikula@linux.intel.com>");
+MODULE_AUTHOR("Raymond Tan <raymond.tan@intel.com>");
+MODULE_DESCRIPTION("DesignWare PWM Controller");
+MODULE_LICENSE("GPL");
index 4bab730..c9fc6f2 100644 (file)
@@ -169,15 +169,13 @@ static const struct pwm_ops ep93xx_pwm_ops = {
 static int ep93xx_pwm_probe(struct platform_device *pdev)
 {
        struct ep93xx_pwm *ep93xx_pwm;
-       struct resource *res;
        int ret;
 
        ep93xx_pwm = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_pwm), GFP_KERNEL);
        if (!ep93xx_pwm)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       ep93xx_pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       ep93xx_pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(ep93xx_pwm->base))
                return PTR_ERR(ep93xx_pwm->base);
 
index 59272a9..2a68012 100644 (file)
@@ -399,7 +399,6 @@ static const struct regmap_config fsl_pwm_regmap_config = {
 static int fsl_pwm_probe(struct platform_device *pdev)
 {
        struct fsl_pwm_chip *fpc;
-       struct resource *res;
        void __iomem *base;
        int ret;
 
@@ -412,8 +411,7 @@ static int fsl_pwm_probe(struct platform_device *pdev)
        fpc->soc = of_device_get_match_data(&pdev->dev);
        fpc->chip.dev = &pdev->dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(&pdev->dev, res);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
index ad205fd..a1900d0 100644 (file)
@@ -190,9 +190,7 @@ static int hibvt_pwm_probe(struct platform_device *pdev)
        const struct hibvt_pwm_soc *soc =
                                of_device_get_match_data(&pdev->dev);
        struct hibvt_pwm_chip *pwm_chip;
-       struct resource *res;
-       int ret;
-       int i;
+       int ret, i;
 
        pwm_chip = devm_kzalloc(&pdev->dev, sizeof(*pwm_chip), GFP_KERNEL);
        if (pwm_chip == NULL)
@@ -213,8 +211,7 @@ static int hibvt_pwm_probe(struct platform_device *pdev)
        pwm_chip->chip.of_pwm_n_cells = 3;
        pwm_chip->soc = soc;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pwm_chip->base = devm_ioremap_resource(&pdev->dev, res);
+       pwm_chip->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pwm_chip->base))
                return PTR_ERR(pwm_chip->base);
 
index a34d95e..6faf5b5 100644 (file)
@@ -240,7 +240,6 @@ static int img_pwm_probe(struct platform_device *pdev)
        int ret;
        u64 val;
        unsigned long clk_rate;
-       struct resource *res;
        struct img_pwm_chip *pwm;
        const struct of_device_id *of_dev_id;
 
@@ -250,8 +249,7 @@ static int img_pwm_probe(struct platform_device *pdev)
 
        pwm->dev = &pdev->dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pwm->base))
                return PTR_ERR(pwm->base);
 
index fcdf6be..aaf629b 100644 (file)
@@ -350,13 +350,9 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev)
                return PTR_ERR(tpm->base);
 
        tpm->clk = devm_clk_get(&pdev->dev, NULL);
-       if (IS_ERR(tpm->clk)) {
-               ret = PTR_ERR(tpm->clk);
-               if (ret != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to get PWM clock: %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(tpm->clk))
+               return dev_err_probe(&pdev->dev, PTR_ERR(tpm->clk),
+                                    "failed to get PWM clock\n");
 
        ret = clk_prepare_enable(tpm->clk);
        if (ret) {
index f8b2c2e..727e0d3 100644 (file)
@@ -136,7 +136,6 @@ MODULE_DEVICE_TABLE(of, pwm_imx1_dt_ids);
 static int pwm_imx1_probe(struct platform_device *pdev)
 {
        struct pwm_imx1_chip *imx;
-       struct resource *r;
 
        imx = devm_kzalloc(&pdev->dev, sizeof(*imx), GFP_KERNEL);
        if (!imx)
@@ -145,31 +144,21 @@ static int pwm_imx1_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, imx);
 
        imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
-       if (IS_ERR(imx->clk_ipg)) {
-               dev_err(&pdev->dev, "getting ipg clock failed with %ld\n",
-                               PTR_ERR(imx->clk_ipg));
-               return PTR_ERR(imx->clk_ipg);
-       }
+       if (IS_ERR(imx->clk_ipg))
+               return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_ipg),
+                                    "getting ipg clock failed\n");
 
        imx->clk_per = devm_clk_get(&pdev->dev, "per");
-       if (IS_ERR(imx->clk_per)) {
-               int ret = PTR_ERR(imx->clk_per);
-
-               if (ret != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to get peripheral clock: %d\n",
-                               ret);
-
-               return ret;
-       }
+       if (IS_ERR(imx->clk_per))
+               return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_per),
+                                    "failed to get peripheral clock\n");
 
        imx->chip.ops = &pwm_imx1_ops;
        imx->chip.dev = &pdev->dev;
        imx->chip.base = -1;
        imx->chip.npwm = 1;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       imx->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+       imx->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(imx->mmio_base))
                return PTR_ERR(imx->mmio_base);
 
index c50d453..1805532 100644 (file)
@@ -235,8 +235,9 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        period_cycles /= prescale;
        c = clkrate * state->duty_cycle;
-       do_div(c, NSEC_PER_SEC * prescale);
+       do_div(c, NSEC_PER_SEC);
        duty_cycles = c;
+       duty_cycles /= prescale;
 
        /*
         * according to imx pwm RM, the real period value should be PERIOD
@@ -315,27 +316,14 @@ static int pwm_imx27_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, imx);
 
        imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
-       if (IS_ERR(imx->clk_ipg)) {
-               int ret = PTR_ERR(imx->clk_ipg);
-
-               if (ret != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "getting ipg clock failed with %d\n",
-                               ret);
-               return ret;
-       }
+       if (IS_ERR(imx->clk_ipg))
+               return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_ipg),
+                                    "getting ipg clock failed\n");
 
        imx->clk_per = devm_clk_get(&pdev->dev, "per");
-       if (IS_ERR(imx->clk_per)) {
-               int ret = PTR_ERR(imx->clk_per);
-
-               if (ret != -EPROBE_DEFER)
-                       dev_err(&pdev->dev,
-                               "failed to get peripheral clock: %d\n",
-                               ret);
-
-               return ret;
-       }
+       if (IS_ERR(imx->clk_per))
+               return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_per),
+                                    "failed to get peripheral clock\n");
 
        imx->chip.ops = &pwm_imx27_ops;
        imx->chip.dev = &pdev->dev;
diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c
new file mode 100644 (file)
index 0000000..e9e54dd
--- /dev/null
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Intel Corporation.
+ *
+ * Limitations:
+ * - The hardware supports fixed period & configures only 2-wire mode.
+ * - Supports normal polarity. Does not support changing polarity.
+ * - When PWM is disabled, output of PWM will become 0(inactive). It doesn't
+ *   keep track of running period.
+ * - When duty cycle is changed, PWM output may be a mix of previous setting
+ *   and new setting for the first period. From second period, the output is
+ *   based on new setting.
+ * - It is a dedicated PWM fan controller. There are no other consumers for
+ *   this PWM controller.
+ */
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pwm.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#define LGM_PWM_FAN_CON0               0x0
+#define LGM_PWM_FAN_EN_EN              BIT(0)
+#define LGM_PWM_FAN_EN_DIS             0x0
+#define LGM_PWM_FAN_EN_MSK             BIT(0)
+#define LGM_PWM_FAN_MODE_2WIRE         0x0
+#define LGM_PWM_FAN_MODE_MSK           BIT(1)
+#define LGM_PWM_FAN_DC_MSK             GENMASK(23, 16)
+
+#define LGM_PWM_FAN_CON1               0x4
+#define LGM_PWM_FAN_MAX_RPM_MSK                GENMASK(15, 0)
+
+#define LGM_PWM_MAX_RPM                        (BIT(16) - 1)
+#define LGM_PWM_DEFAULT_RPM            4000
+#define LGM_PWM_MAX_DUTY_CYCLE         (BIT(8) - 1)
+
+#define LGM_PWM_DC_BITS                        8
+
+#define LGM_PWM_PERIOD_2WIRE_NS                (40 * NSEC_PER_MSEC)
+
+struct lgm_pwm_chip {
+       struct pwm_chip chip;
+       struct regmap *regmap;
+       u32 period;
+};
+
+static inline struct lgm_pwm_chip *to_lgm_pwm_chip(struct pwm_chip *chip)
+{
+       return container_of(chip, struct lgm_pwm_chip, chip);
+}
+
+static int lgm_pwm_enable(struct pwm_chip *chip, bool enable)
+{
+       struct lgm_pwm_chip *pc = to_lgm_pwm_chip(chip);
+       struct regmap *regmap = pc->regmap;
+
+       return regmap_update_bits(regmap, LGM_PWM_FAN_CON0, LGM_PWM_FAN_EN_MSK,
+                                 enable ? LGM_PWM_FAN_EN_EN : LGM_PWM_FAN_EN_DIS);
+}
+
+static int lgm_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                        const struct pwm_state *state)
+{
+       struct lgm_pwm_chip *pc = to_lgm_pwm_chip(chip);
+       u32 duty_cycle, val;
+       int ret;
+
+       /* The hardware only supports normal polarity and fixed period. */
+       if (state->polarity != PWM_POLARITY_NORMAL || state->period < pc->period)
+               return -EINVAL;
+
+       if (!state->enabled)
+               return lgm_pwm_enable(chip, 0);
+
+       duty_cycle = min_t(u64, state->duty_cycle, pc->period);
+       val = duty_cycle * LGM_PWM_MAX_DUTY_CYCLE / pc->period;
+
+       ret = regmap_update_bits(pc->regmap, LGM_PWM_FAN_CON0, LGM_PWM_FAN_DC_MSK,
+                                FIELD_PREP(LGM_PWM_FAN_DC_MSK, val));
+       if (ret)
+               return ret;
+
+       return lgm_pwm_enable(chip, 1);
+}
+
+static void lgm_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                             struct pwm_state *state)
+{
+       struct lgm_pwm_chip *pc = to_lgm_pwm_chip(chip);
+       u32 duty, val;
+
+       state->enabled = regmap_test_bits(pc->regmap, LGM_PWM_FAN_CON0,
+                                         LGM_PWM_FAN_EN_EN);
+       state->polarity = PWM_POLARITY_NORMAL;
+       state->period = pc->period; /* fixed period */
+
+       regmap_read(pc->regmap, LGM_PWM_FAN_CON0, &val);
+       duty = FIELD_GET(LGM_PWM_FAN_DC_MSK, val);
+       state->duty_cycle = DIV_ROUND_UP(duty * pc->period, LGM_PWM_MAX_DUTY_CYCLE);
+}
+
+static const struct pwm_ops lgm_pwm_ops = {
+       .get_state = lgm_pwm_get_state,
+       .apply = lgm_pwm_apply,
+       .owner = THIS_MODULE,
+};
+
+static void lgm_pwm_init(struct lgm_pwm_chip *pc)
+{
+       struct regmap *regmap = pc->regmap;
+       u32 con0_val;
+
+       con0_val = FIELD_PREP(LGM_PWM_FAN_MODE_MSK, LGM_PWM_FAN_MODE_2WIRE);
+       pc->period = LGM_PWM_PERIOD_2WIRE_NS;
+       regmap_update_bits(regmap, LGM_PWM_FAN_CON1, LGM_PWM_FAN_MAX_RPM_MSK,
+                          LGM_PWM_DEFAULT_RPM);
+       regmap_update_bits(regmap, LGM_PWM_FAN_CON0, LGM_PWM_FAN_MODE_MSK,
+                          con0_val);
+}
+
+static const struct regmap_config lgm_pwm_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+};
+
+static void lgm_clk_release(void *data)
+{
+       struct clk *clk = data;
+
+       clk_disable_unprepare(clk);
+}
+
+static int lgm_clk_enable(struct device *dev, struct clk *clk)
+{
+       int ret;
+
+       ret = clk_prepare_enable(clk);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, lgm_clk_release, clk);
+}
+
+static void lgm_reset_control_release(void *data)
+{
+       struct reset_control *rst = data;
+
+       reset_control_assert(rst);
+}
+
+static int lgm_reset_control_deassert(struct device *dev, struct reset_control *rst)
+{
+       int ret;
+
+       ret = reset_control_deassert(rst);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, lgm_reset_control_release, rst);
+}
+
+static int lgm_pwm_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct reset_control *rst;
+       struct lgm_pwm_chip *pc;
+       void __iomem *io_base;
+       struct clk *clk;
+       int ret;
+
+       pc = devm_kzalloc(dev, sizeof(*pc), GFP_KERNEL);
+       if (!pc)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, pc);
+
+       io_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(io_base))
+               return PTR_ERR(io_base);
+
+       pc->regmap = devm_regmap_init_mmio(dev, io_base, &lgm_pwm_regmap_config);
+       if (IS_ERR(pc->regmap))
+               return dev_err_probe(dev, PTR_ERR(pc->regmap),
+                                    "failed to init register map\n");
+
+       clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(clk))
+               return dev_err_probe(dev, PTR_ERR(clk), "failed to get clock\n");
+
+       ret = lgm_clk_enable(dev, clk);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to enable clock\n");
+
+       rst = devm_reset_control_get_exclusive(dev, NULL);
+       if (IS_ERR(rst))
+               return dev_err_probe(dev, PTR_ERR(rst),
+                                    "failed to get reset control\n");
+
+       ret = lgm_reset_control_deassert(dev, rst);
+       if (ret)
+               return dev_err_probe(dev, ret, "cannot deassert reset control\n");
+
+       pc->chip.dev = dev;
+       pc->chip.ops = &lgm_pwm_ops;
+       pc->chip.npwm = 1;
+       pc->chip.base = -1;
+
+       lgm_pwm_init(pc);
+
+       ret = pwmchip_add(&pc->chip);
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to add PWM chip\n");
+
+       return 0;
+}
+
+static int lgm_pwm_remove(struct platform_device *pdev)
+{
+       struct lgm_pwm_chip *pc = platform_get_drvdata(pdev);
+
+       return pwmchip_remove(&pc->chip);
+}
+
+static const struct of_device_id lgm_pwm_of_match[] = {
+       { .compatible = "intel,lgm-pwm" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, lgm_pwm_of_match);
+
+static struct platform_driver lgm_pwm_driver = {
+       .driver = {
+               .name = "intel-pwm",
+               .of_match_table = lgm_pwm_of_match,
+       },
+       .probe = lgm_pwm_probe,
+       .remove = lgm_pwm_remove,
+};
+module_platform_driver(lgm_pwm_driver);
+
+MODULE_LICENSE("GPL v2");
index 7d33e36..5ede825 100644 (file)
@@ -50,7 +50,7 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        int ret;
 
        if (state->polarity != PWM_POLARITY_NORMAL)
-               return -ENOTSUPP;
+               return -EINVAL;
 
        if (state->period < IQS620_PWM_PERIOD_NS)
                return -EINVAL;
diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c
new file mode 100644 (file)
index 0000000..cdfdef6
--- /dev/null
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Keem Bay PWM driver
+ *
+ * Copyright (C) 2020 Intel Corporation
+ * Authors: Lai Poey Seng <poey.seng.lai@intel.com>
+ *          Vineetha G. Jaya Kumaran <vineetha.g.jaya.kumaran@intel.com>
+ *
+ * Limitations:
+ * - Upon disabling a channel, the currently running
+ *   period will not be completed. However, upon
+ *   reconfiguration of the duty cycle/period, the
+ *   currently running period will be completed first.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+#include <linux/regmap.h>
+
+#define KMB_TOTAL_PWM_CHANNELS         6
+#define KMB_PWM_COUNT_MAX              U16_MAX
+#define KMB_PWM_EN_BIT                 BIT(31)
+
+/* Mask */
+#define KMB_PWM_HIGH_MASK              GENMASK(31, 16)
+#define KMB_PWM_LOW_MASK               GENMASK(15, 0)
+#define KMB_PWM_LEADIN_MASK            GENMASK(30, 0)
+
+/* PWM Register offset */
+#define KMB_PWM_LEADIN_OFFSET(ch)      (0x00 + 4 * (ch))
+#define KMB_PWM_HIGHLOW_OFFSET(ch)     (0x20 + 4 * (ch))
+
+struct keembay_pwm {
+       struct pwm_chip chip;
+       struct device *dev;
+       struct clk *clk;
+       void __iomem *base;
+};
+
+static inline struct keembay_pwm *to_keembay_pwm_dev(struct pwm_chip *chip)
+{
+       return container_of(chip, struct keembay_pwm, chip);
+}
+
+static void keembay_clk_unprepare(void *data)
+{
+       clk_disable_unprepare(data);
+}
+
+static int keembay_clk_enable(struct device *dev, struct clk *clk)
+{
+       int ret;
+
+       ret = clk_prepare_enable(clk);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, keembay_clk_unprepare, clk);
+}
+
+/*
+ * With gcc 10, CONFIG_CC_OPTIMIZE_FOR_SIZE and only "inline" instead of
+ * "__always_inline" this fails to compile because the compiler doesn't notice
+ * for all valid masks (e.g. KMB_PWM_LEADIN_MASK) that they are ok.
+ */
+static __always_inline void keembay_pwm_update_bits(struct keembay_pwm *priv, u32 mask,
+                                          u32 val, u32 offset)
+{
+       u32 buff = readl(priv->base + offset);
+
+       buff = u32_replace_bits(buff, val, mask);
+       writel(buff, priv->base + offset);
+}
+
+static void keembay_pwm_enable(struct keembay_pwm *priv, int ch)
+{
+       keembay_pwm_update_bits(priv, KMB_PWM_EN_BIT, 1,
+                               KMB_PWM_LEADIN_OFFSET(ch));
+}
+
+static void keembay_pwm_disable(struct keembay_pwm *priv, int ch)
+{
+       keembay_pwm_update_bits(priv, KMB_PWM_EN_BIT, 0,
+                               KMB_PWM_LEADIN_OFFSET(ch));
+}
+
+static void keembay_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+                                 struct pwm_state *state)
+{
+       struct keembay_pwm *priv = to_keembay_pwm_dev(chip);
+       unsigned long long high, low;
+       unsigned long clk_rate;
+       u32 highlow;
+
+       clk_rate = clk_get_rate(priv->clk);
+
+       /* Read channel enabled status */
+       highlow = readl(priv->base + KMB_PWM_LEADIN_OFFSET(pwm->hwpwm));
+       if (highlow & KMB_PWM_EN_BIT)
+               state->enabled = true;
+       else
+               state->enabled = false;
+
+       /* Read period and duty cycle */
+       highlow = readl(priv->base + KMB_PWM_HIGHLOW_OFFSET(pwm->hwpwm));
+       low = FIELD_GET(KMB_PWM_LOW_MASK, highlow) * NSEC_PER_SEC;
+       high = FIELD_GET(KMB_PWM_HIGH_MASK, highlow) * NSEC_PER_SEC;
+       state->duty_cycle = DIV_ROUND_UP_ULL(high, clk_rate);
+       state->period = DIV_ROUND_UP_ULL(high + low, clk_rate);
+       state->polarity = PWM_POLARITY_NORMAL;
+}
+
+static int keembay_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+                            const struct pwm_state *state)
+{
+       struct keembay_pwm *priv = to_keembay_pwm_dev(chip);
+       struct pwm_state current_state;
+       unsigned long long div;
+       unsigned long clk_rate;
+       u32 pwm_count = 0;
+       u16 high, low;
+
+       if (state->polarity != PWM_POLARITY_NORMAL)
+               return -EINVAL;
+
+       /*
+        * Configure the pwm repeat count as infinite at (15:0) and leadin
+        * low time as 0 at (30:16), which is in terms of clock cycles.
+        */
+       keembay_pwm_update_bits(priv, KMB_PWM_LEADIN_MASK, 0,
+                               KMB_PWM_LEADIN_OFFSET(pwm->hwpwm));
+
+       keembay_pwm_get_state(chip, pwm, &current_state);
+
+       if (!state->enabled) {
+               if (current_state.enabled)
+                       keembay_pwm_disable(priv, pwm->hwpwm);
+               return 0;
+       }
+
+       /*
+        * The upper 16 bits and lower 16 bits of the KMB_PWM_HIGHLOW_OFFSET
+        * register contain the high time and low time of waveform accordingly.
+        * All the values are in terms of clock cycles.
+        */
+
+       clk_rate = clk_get_rate(priv->clk);
+       div = clk_rate * state->duty_cycle;
+       div = DIV_ROUND_DOWN_ULL(div, NSEC_PER_SEC);
+       if (div > KMB_PWM_COUNT_MAX)
+               return -ERANGE;
+
+       high = div;
+       div = clk_rate * state->period;
+       div = DIV_ROUND_DOWN_ULL(div, NSEC_PER_SEC);
+       div = div - high;
+       if (div > KMB_PWM_COUNT_MAX)
+               return -ERANGE;
+
+       low = div;
+
+       pwm_count = FIELD_PREP(KMB_PWM_HIGH_MASK, high) |
+                   FIELD_PREP(KMB_PWM_LOW_MASK, low);
+
+       writel(pwm_count, priv->base + KMB_PWM_HIGHLOW_OFFSET(pwm->hwpwm));
+
+       if (state->enabled && !current_state.enabled)
+               keembay_pwm_enable(priv, pwm->hwpwm);
+
+       return 0;
+}
+
+static const struct pwm_ops keembay_pwm_ops = {
+       .owner = THIS_MODULE,
+       .apply = keembay_pwm_apply,
+       .get_state = keembay_pwm_get_state,
+};
+
+static int keembay_pwm_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct keembay_pwm *priv;
+       int ret;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(priv->clk))
+               return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to get clock\n");
+
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(priv->base))
+               return PTR_ERR(priv->base);
+
+       ret = keembay_clk_enable(dev, priv->clk);
+       if (ret)
+               return ret;
+
+       priv->chip.base = -1;
+       priv->chip.dev = dev;
+       priv->chip.ops = &keembay_pwm_ops;
+       priv->chip.npwm = KMB_TOTAL_PWM_CHANNELS;
+
+       ret = pwmchip_add(&priv->chip);
+       if (ret)
+               return dev_err_probe(dev, ret, "Failed to add PWM chip\n");
+
+       platform_set_drvdata(pdev, priv);
+
+       return 0;
+}
+
+static int keembay_pwm_remove(struct platform_device *pdev)
+{
+       struct keembay_pwm *priv = platform_get_drvdata(pdev);
+
+       return pwmchip_remove(&priv->chip);
+}
+
+static const struct of_device_id keembay_pwm_of_match[] = {
+       { .compatible = "intel,keembay-pwm" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, keembay_pwm_of_match);
+
+static struct platform_driver keembay_pwm_driver = {
+       .probe  = keembay_pwm_probe,
+       .remove = keembay_pwm_remove,
+       .driver = {
+               .name = "pwm-keembay",
+               .of_match_table = keembay_pwm_of_match,
+       },
+};
+module_platform_driver(keembay_pwm_driver);
+
+MODULE_ALIAS("platform:pwm-keembay");
+MODULE_DESCRIPTION("Intel Keem Bay PWM driver");
+MODULE_LICENSE("GPL v2");
index 7551253..bf3f14f 100644 (file)
@@ -275,6 +275,7 @@ static int lp3943_pwm_probe(struct platform_device *pdev)
        lp3943_pwm->chip.dev = &pdev->dev;
        lp3943_pwm->chip.ops = &lp3943_pwm_ops;
        lp3943_pwm->chip.npwm = LP3943_NUM_PWMS;
+       lp3943_pwm->chip.base = -1;
 
        platform_set_drvdata(pdev, lp3943_pwm);
 
index 5ff1114..dc5133b 100644 (file)
@@ -325,7 +325,6 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
 {
        struct lpc18xx_pwm_chip *lpc18xx_pwm;
        struct pwm_device *pwm;
-       struct resource *res;
        int ret, i;
        u64 val;
 
@@ -336,8 +335,7 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
 
        lpc18xx_pwm->dev = &pdev->dev;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       lpc18xx_pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       lpc18xx_pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(lpc18xx_pwm->base))
                return PTR_ERR(lpc18xx_pwm->base);
 
index 710d9a2..6b40904 100644 (file)
@@ -98,7 +98,6 @@ static const struct pwm_ops lpc32xx_pwm_ops = {
 static int lpc32xx_pwm_probe(struct platform_device *pdev)
 {
        struct lpc32xx_pwm_chip *lpc32xx;
-       struct resource *res;
        int ret;
        u32 val;
 
@@ -106,8 +105,7 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev)
        if (!lpc32xx)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       lpc32xx->base = devm_ioremap_resource(&pdev->dev, res);
+       lpc32xx->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(lpc32xx->base))
                return PTR_ERR(lpc32xx->base);
 
index c6502cf..986786b 100644 (file)
@@ -58,7 +58,25 @@ static int pwm_lpss_probe_platform(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, lpwm);
 
-       dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
+       /*
+        * On Cherry Trail devices the GFX0._PS0 AML checks if the controller
+        * is on and if it is not on it turns it on and restores what it
+        * believes is the correct state to the PWM controller.
+        * Because of this we must disallow direct-complete, which keeps the
+        * controller (runtime)suspended on resume, to avoid 2 issues:
+        * 1. The controller getting turned on without the linux-pm code
+        *    knowing about this. On devices where the controller is unused
+        *    this causes it to stay on during the next suspend causing high
+        *    battery drain (because S0i3 is not reached)
+        * 2. The state restoring code unexpectedly messing with the controller
+        *
+        * Leaving the controller runtime-suspended (skipping runtime-resume +
+        * normal-suspend) during suspend is fine.
+        */
+       if (info->other_devices_aml_touches_pwm_regs)
+               dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE|
+                                                   DPM_FLAG_SMART_SUSPEND);
+
        pm_runtime_set_active(&pdev->dev);
        pm_runtime_enable(&pdev->dev);
 
@@ -73,24 +91,6 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev)
        return pwm_lpss_remove(lpwm);
 }
 
-static int pwm_lpss_prepare(struct device *dev)
-{
-       struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
-
-       /*
-        * If other device's AML code touches the PWM regs on suspend/resume
-        * force runtime-resume the PWM controller to allow this.
-        */
-       if (lpwm->info->other_devices_aml_touches_pwm_regs)
-               return 0; /* Force runtime-resume */
-
-       return 1; /* If runtime-suspended leave as is */
-}
-
-static const struct dev_pm_ops pwm_lpss_platform_pm_ops = {
-       .prepare = pwm_lpss_prepare,
-};
-
 static const struct acpi_device_id pwm_lpss_acpi_match[] = {
        { "80860F09", (unsigned long)&pwm_lpss_byt_info },
        { "80862288", (unsigned long)&pwm_lpss_bsw_info },
@@ -104,7 +104,6 @@ static struct platform_driver pwm_lpss_driver_platform = {
        .driver = {
                .name = "pwm-lpss",
                .acpi_match_table = pwm_lpss_acpi_match,
-               .pm = &pwm_lpss_platform_pm_ops,
        },
        .probe = pwm_lpss_probe_platform,
        .remove = pwm_lpss_remove_platform,
index 3444c56..939de93 100644 (file)
@@ -76,7 +76,12 @@ static int pwm_lpss_wait_for_update(struct pwm_device *pwm)
 
 static inline int pwm_lpss_is_updating(struct pwm_device *pwm)
 {
-       return (pwm_lpss_read(pwm) & PWM_SW_UPDATE) ? -EBUSY : 0;
+       if (pwm_lpss_read(pwm) & PWM_SW_UPDATE) {
+               dev_err(pwm->chip->dev, "PWM_SW_UPDATE is still set, skipping update\n");
+               return -EBUSY;
+       }
+
+       return 0;
 }
 
 static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
index ab001ce..fcfc3b1 100644 (file)
 #define PWM45DWIDTH_FIXUP      0x30
 #define PWMTHRES               0x30
 #define PWM45THRES_FIXUP       0x34
+#define PWM_CK_26M_SEL         0x210
 
 #define PWM_CLK_DIV_MAX                7
 
 struct pwm_mediatek_of_data {
        unsigned int num_pwms;
        bool pwm45_fixup;
+       bool has_ck_26m_sel;
 };
 
 /**
@@ -132,6 +134,10 @@ static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
        if (ret < 0)
                return ret;
 
+       /* Make sure we use the bus clock and not the 26MHz clock */
+       if (pc->soc->has_ck_26m_sel)
+               writel(0, pc->regs + PWM_CK_26M_SEL);
+
        /* Using resolution in picosecond gets accuracy higher */
        resolution = (u64)NSEC_PER_SEC * 1000;
        do_div(resolution, clk_get_rate(pc->clk_pwms[pwm->hwpwm]));
@@ -208,7 +214,6 @@ static const struct pwm_ops pwm_mediatek_ops = {
 static int pwm_mediatek_probe(struct platform_device *pdev)
 {
        struct pwm_mediatek_chip *pc;
-       struct resource *res;
        unsigned int i;
        int ret;
 
@@ -218,8 +223,7 @@ static int pwm_mediatek_probe(struct platform_device *pdev)
 
        pc->soc = of_device_get_match_data(&pdev->dev);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pc->regs = devm_ioremap_resource(&pdev->dev, res);
+       pc->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->regs))
                return PTR_ERR(pc->regs);
 
@@ -281,31 +285,43 @@ static int pwm_mediatek_remove(struct platform_device *pdev)
 static const struct pwm_mediatek_of_data mt2712_pwm_data = {
        .num_pwms = 8,
        .pwm45_fixup = false,
+       .has_ck_26m_sel = false,
 };
 
 static const struct pwm_mediatek_of_data mt7622_pwm_data = {
        .num_pwms = 6,
        .pwm45_fixup = false,
+       .has_ck_26m_sel = false,
 };
 
 static const struct pwm_mediatek_of_data mt7623_pwm_data = {
        .num_pwms = 5,
        .pwm45_fixup = true,
+       .has_ck_26m_sel = false,
 };
 
 static const struct pwm_mediatek_of_data mt7628_pwm_data = {
        .num_pwms = 4,
        .pwm45_fixup = true,
+       .has_ck_26m_sel = false,
 };
 
 static const struct pwm_mediatek_of_data mt7629_pwm_data = {
        .num_pwms = 1,
        .pwm45_fixup = false,
+       .has_ck_26m_sel = false,
+};
+
+static const struct pwm_mediatek_of_data mt8183_pwm_data = {
+       .num_pwms = 4,
+       .pwm45_fixup = false,
+       .has_ck_26m_sel = true,
 };
 
 static const struct pwm_mediatek_of_data mt8516_pwm_data = {
        .num_pwms = 5,
        .pwm45_fixup = false,
+       .has_ck_26m_sel = true,
 };
 
 static const struct of_device_id pwm_mediatek_of_match[] = {
@@ -314,6 +330,7 @@ static const struct of_device_id pwm_mediatek_of_match[] = {
        { .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data },
        { .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data },
        { .compatible = "mediatek,mt7629-pwm", .data = &mt7629_pwm_data },
+       { .compatible = "mediatek,mt8183-pwm", .data = &mt8183_pwm_data },
        { .compatible = "mediatek,mt8516-pwm", .data = &mt8516_pwm_data },
        { },
 };
index bd0d733..a3ce978 100644 (file)
@@ -537,15 +537,13 @@ static int meson_pwm_init_channels(struct meson_pwm *meson)
 static int meson_pwm_probe(struct platform_device *pdev)
 {
        struct meson_pwm *meson;
-       struct resource *regs;
        int err;
 
        meson = devm_kzalloc(&pdev->dev, sizeof(*meson), GFP_KERNEL);
        if (!meson)
                return -ENOMEM;
 
-       regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       meson->base = devm_ioremap_resource(&pdev->dev, regs);
+       meson->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(meson->base))
                return PTR_ERR(meson->base);
 
index 83b8be0..87c6b4b 100644 (file)
@@ -172,7 +172,6 @@ static const struct pwm_ops mtk_disp_pwm_ops = {
 static int mtk_disp_pwm_probe(struct platform_device *pdev)
 {
        struct mtk_disp_pwm *mdp;
-       struct resource *r;
        int ret;
 
        mdp = devm_kzalloc(&pdev->dev, sizeof(*mdp), GFP_KERNEL);
@@ -181,8 +180,7 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev)
 
        mdp->data = of_device_get_match_data(&pdev->dev);
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       mdp->base = devm_ioremap_resource(&pdev->dev, r);
+       mdp->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(mdp->base))
                return PTR_ERR(mdp->base);
 
index a2a0912..d06cf60 100644 (file)
@@ -166,7 +166,6 @@ static int pwm_probe(struct platform_device *pdev)
 {
        const struct platform_device_id *id = platform_get_device_id(pdev);
        struct pxa_pwm_chip *pwm;
-       struct resource *r;
        int ret = 0;
 
        if (IS_ENABLED(CONFIG_OF) && id == NULL)
@@ -193,8 +192,7 @@ static int pwm_probe(struct platform_device *pdev)
                pwm->chip.of_pwm_n_cells = 1;
        }
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pwm->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+       pwm->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pwm->mmio_base))
                return PTR_ERR(pwm->mmio_base);
 
index 7ab9eb6..002ab79 100644 (file)
@@ -168,7 +168,7 @@ static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        /* This HW/driver only supports normal polarity */
        if (state->polarity != PWM_POLARITY_NORMAL)
-               return -ENOTSUPP;
+               return -EINVAL;
 
        if (!state->enabled) {
                rcar_pwm_disable(rp);
@@ -204,15 +204,13 @@ static const struct pwm_ops rcar_pwm_ops = {
 static int rcar_pwm_probe(struct platform_device *pdev)
 {
        struct rcar_pwm_chip *rcar_pwm;
-       struct resource *res;
        int ret;
 
        rcar_pwm = devm_kzalloc(&pdev->dev, sizeof(*rcar_pwm), GFP_KERNEL);
        if (rcar_pwm == NULL)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       rcar_pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       rcar_pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(rcar_pwm->base))
                return PTR_ERR(rcar_pwm->base);
 
index 81ad5a5..d02b24b 100644 (file)
@@ -383,7 +383,6 @@ static const struct pwm_ops tpu_pwm_ops = {
 static int tpu_probe(struct platform_device *pdev)
 {
        struct tpu_device *tpu;
-       struct resource *res;
        int ret;
 
        tpu = devm_kzalloc(&pdev->dev, sizeof(*tpu), GFP_KERNEL);
@@ -394,8 +393,7 @@ static int tpu_probe(struct platform_device *pdev)
        tpu->pdev = pdev;
 
        /* Map memory, get clock and pin control. */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       tpu->base = devm_ioremap_resource(&pdev->dev, res);
+       tpu->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(tpu->base))
                return PTR_ERR(tpu->base);
 
index 77c23a2..389a5e1 100644 (file)
@@ -287,7 +287,6 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
 {
        const struct of_device_id *id;
        struct rockchip_pwm_chip *pc;
-       struct resource *r;
        u32 enable_conf, ctrl;
        int ret, count;
 
@@ -299,8 +298,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
        if (!pc)
                return -ENOMEM;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pc->base = devm_ioremap_resource(&pdev->dev, r);
+       pc->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->base))
                return PTR_ERR(pc->base);
 
index 87a886f..645d006 100644 (file)
@@ -510,7 +510,6 @@ static int pwm_samsung_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct samsung_pwm_chip *chip;
-       struct resource *res;
        unsigned int chan;
        int ret;
 
@@ -541,8 +540,7 @@ static int pwm_samsung_probe(struct platform_device *pdev)
                                                        sizeof(chip->variant));
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       chip->base = devm_ioremap_resource(&pdev->dev, res);
+       chip->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(chip->base))
                return PTR_ERR(chip->base);
 
index 2485fba..2a7cd2d 100644 (file)
@@ -232,7 +232,6 @@ static int pwm_sifive_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct pwm_sifive_ddata *ddata;
        struct pwm_chip *chip;
-       struct resource *res;
        int ret;
 
        ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
@@ -248,8 +247,7 @@ static int pwm_sifive_probe(struct platform_device *pdev)
        chip->base = -1;
        chip->npwm = 4;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       ddata->regs = devm_ioremap_resource(dev, res);
+       ddata->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(ddata->regs))
                return PTR_ERR(ddata->regs);
 
index b4c651f..0b01ec2 100644 (file)
@@ -232,6 +232,8 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev)
        chip->base = -1;
        chip->npwm = 1;
 
+       platform_set_drvdata(pdev, priv);
+
        ret = pwmchip_add(&priv->pwm_chip);
        if (ret) {
                dev_err(&pdev->dev, "failed to add PWM chip (%pe)",
@@ -239,8 +241,6 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev)
                return ret;
        }
 
-       platform_set_drvdata(pdev, priv);
-
        return 0;
 }
 
index 6c6b44f..f63b54a 100644 (file)
@@ -174,7 +174,6 @@ static int spear_pwm_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct spear_pwm_chip *pc;
-       struct resource *r;
        int ret;
        u32 val;
 
@@ -182,8 +181,7 @@ static int spear_pwm_probe(struct platform_device *pdev)
        if (!pc)
                return -ENOMEM;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pc->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+       pc->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->mmio_base))
                return PTR_ERR(pc->mmio_base);
 
index 1508616..99c70e0 100644 (file)
@@ -505,7 +505,6 @@ static int sti_pwm_probe_dt(struct sti_pwm_chip *pc)
        if (IS_ERR(pc->prescale_high))
                return PTR_ERR(pc->prescale_high);
 
-
        pc->pwm_out_en = devm_regmap_field_alloc(dev, pc->regmap,
                                                 reg_fields[PWM_OUT_EN]);
        if (IS_ERR(pc->pwm_out_en))
@@ -540,7 +539,6 @@ static int sti_pwm_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct sti_pwm_compat_data *cdata;
        struct sti_pwm_chip *pc;
-       struct resource *res;
        unsigned int i;
        int irq, ret;
 
@@ -552,9 +550,7 @@ static int sti_pwm_probe(struct platform_device *pdev)
        if (!cdata)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       pc->mmio = devm_ioremap_resource(dev, res);
+       pc->mmio = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->mmio))
                return PTR_ERR(pc->mmio);
 
@@ -593,38 +589,34 @@ static int sti_pwm_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       if (!cdata->pwm_num_devs)
-               goto skip_pwm;
-
-       pc->pwm_clk = of_clk_get_by_name(dev->of_node, "pwm");
-       if (IS_ERR(pc->pwm_clk)) {
-               dev_err(dev, "failed to get PWM clock\n");
-               return PTR_ERR(pc->pwm_clk);
-       }
+       if (cdata->pwm_num_devs) {
+               pc->pwm_clk = of_clk_get_by_name(dev->of_node, "pwm");
+               if (IS_ERR(pc->pwm_clk)) {
+                       dev_err(dev, "failed to get PWM clock\n");
+                       return PTR_ERR(pc->pwm_clk);
+               }
 
-       ret = clk_prepare(pc->pwm_clk);
-       if (ret) {
-               dev_err(dev, "failed to prepare clock\n");
-               return ret;
+               ret = clk_prepare(pc->pwm_clk);
+               if (ret) {
+                       dev_err(dev, "failed to prepare clock\n");
+                       return ret;
+               }
        }
 
-skip_pwm:
-       if (!cdata->cpt_num_devs)
-               goto skip_cpt;
-
-       pc->cpt_clk = of_clk_get_by_name(dev->of_node, "capture");
-       if (IS_ERR(pc->cpt_clk)) {
-               dev_err(dev, "failed to get PWM capture clock\n");
-               return PTR_ERR(pc->cpt_clk);
-       }
+       if (cdata->cpt_num_devs) {
+               pc->cpt_clk = of_clk_get_by_name(dev->of_node, "capture");
+               if (IS_ERR(pc->cpt_clk)) {
+                       dev_err(dev, "failed to get PWM capture clock\n");
+                       return PTR_ERR(pc->cpt_clk);
+               }
 
-       ret = clk_prepare(pc->cpt_clk);
-       if (ret) {
-               dev_err(dev, "failed to prepare clock\n");
-               return ret;
+               ret = clk_prepare(pc->cpt_clk);
+               if (ret) {
+                       dev_err(dev, "failed to prepare clock\n");
+                       return ret;
+               }
        }
 
-skip_cpt:
        pc->chip.dev = dev;
        pc->chip.ops = &sti_pwm_ops;
        pc->chip.base = -1;
index 38a4c5c..ce5c4fc 100644 (file)
@@ -294,12 +294,8 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        ctrl |= BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
 
-       if (state->enabled) {
+       if (state->enabled)
                ctrl |= BIT_CH(PWM_EN, pwm->hwpwm);
-       } else {
-               ctrl &= ~BIT_CH(PWM_EN, pwm->hwpwm);
-               ctrl &= ~BIT_CH(PWM_CLK_GATING, pwm->hwpwm);
-       }
 
        sun4i_pwm_writel(sun4i_pwm, ctrl, PWM_CTRL_REG);
 
@@ -395,7 +391,6 @@ MODULE_DEVICE_TABLE(of, sun4i_pwm_dt_ids);
 static int sun4i_pwm_probe(struct platform_device *pdev)
 {
        struct sun4i_pwm_chip *pwm;
-       struct resource *res;
        int ret;
 
        pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
@@ -406,8 +401,7 @@ static int sun4i_pwm_probe(struct platform_device *pdev)
        if (!pwm->data)
                return -ENODEV;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pwm->base = devm_ioremap_resource(&pdev->dev, res);
+       pwm->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pwm->base))
                return PTR_ERR(pwm->base);
 
index 1daf591..55bc63d 100644 (file)
@@ -237,7 +237,6 @@ static const struct pwm_ops tegra_pwm_ops = {
 static int tegra_pwm_probe(struct platform_device *pdev)
 {
        struct tegra_pwm_chip *pwm;
-       struct resource *r;
        int ret;
 
        pwm = devm_kzalloc(&pdev->dev, sizeof(*pwm), GFP_KERNEL);
@@ -247,8 +246,7 @@ static int tegra_pwm_probe(struct platform_device *pdev)
        pwm->soc = of_device_get_match_data(&pdev->dev);
        pwm->dev = &pdev->dev;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pwm->regs = devm_ioremap_resource(&pdev->dev, r);
+       pwm->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pwm->regs))
                return PTR_ERR(pwm->regs);
 
index 683804c..2a89490 100644 (file)
@@ -196,7 +196,6 @@ static int ecap_pwm_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct ecap_pwm_chip *pc;
-       struct resource *r;
        struct clk *clk;
        int ret;
 
@@ -230,8 +229,7 @@ static int ecap_pwm_probe(struct platform_device *pdev)
        pc->chip.base = -1;
        pc->chip.npwm = 1;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pc->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+       pc->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->mmio_base))
                return PTR_ERR(pc->mmio_base);
 
index 0846917..a7fb224 100644 (file)
@@ -421,7 +421,6 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct ehrpwm_pwm_chip *pc;
-       struct resource *r;
        struct clk *clk;
        int ret;
 
@@ -437,10 +436,8 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev)
                }
        }
 
-       if (IS_ERR(clk)) {
-               dev_err(&pdev->dev, "failed to get clock\n");
-               return PTR_ERR(clk);
-       }
+       if (IS_ERR(clk))
+               return dev_err_probe(&pdev->dev, PTR_ERR(clk), "Failed to get fck\n");
 
        pc->clk_rate = clk_get_rate(clk);
        if (!pc->clk_rate) {
@@ -455,17 +452,14 @@ static int ehrpwm_pwm_probe(struct platform_device *pdev)
        pc->chip.base = -1;
        pc->chip.npwm = NUM_PWM_CHANNEL;
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pc->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+       pc->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pc->mmio_base))
                return PTR_ERR(pc->mmio_base);
 
        /* Acquire tbclk for Time Base EHRPWM submodule */
        pc->tbclk = devm_clk_get(&pdev->dev, "tbclk");
-       if (IS_ERR(pc->tbclk)) {
-               dev_err(&pdev->dev, "Failed to get tbclk\n");
-               return PTR_ERR(pc->tbclk);
-       }
+       if (IS_ERR(pc->tbclk))
+               return dev_err_probe(&pdev->dev, PTR_ERR(pc->tbclk), "Failed to get tbclk\n");
 
        ret = clk_prepare(pc->tbclk);
        if (ret < 0) {
index 11d45e5..6e36851 100644 (file)
@@ -193,7 +193,6 @@ MODULE_DEVICE_TABLE(of, vt8500_pwm_dt_ids);
 static int vt8500_pwm_probe(struct platform_device *pdev)
 {
        struct vt8500_chip *chip;
-       struct resource *r;
        struct device_node *np = pdev->dev.of_node;
        int ret;
 
@@ -219,8 +218,7 @@ static int vt8500_pwm_probe(struct platform_device *pdev)
                return PTR_ERR(chip->clk);
        }
 
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       chip->base = devm_ioremap_resource(&pdev->dev, r);
+       chip->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(chip->base))
                return PTR_ERR(chip->base);
 
index e2c21cc..34e9119 100644 (file)
@@ -196,7 +196,6 @@ static const struct pwm_ops zx_pwm_ops = {
 static int zx_pwm_probe(struct platform_device *pdev)
 {
        struct zx_pwm_chip *zpc;
-       struct resource *res;
        unsigned int i;
        int ret;
 
@@ -204,8 +203,7 @@ static int zx_pwm_probe(struct platform_device *pdev)
        if (!zpc)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       zpc->base = devm_ioremap_resource(&pdev->dev, res);
+       zpc->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(zpc->base))
                return PTR_ERR(zpc->base);
 
@@ -238,6 +236,7 @@ static int zx_pwm_probe(struct platform_device *pdev)
        ret = pwmchip_add(&zpc->chip);
        if (ret < 0) {
                dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
+               clk_disable_unprepare(zpc->pclk);
                return ret;
        }
 
index 65ad9d0..6123f9f 100644 (file)
@@ -13,7 +13,7 @@ config RTC_MC146818_LIB
 menuconfig RTC_CLASS
        bool "Real Time Clock"
        default n
-       depends on !S390 && !UML
+       depends on !S390
        select RTC_LIB
        help
          Generic RTC class support. If you say yes here, you will
@@ -817,15 +817,6 @@ config RTC_DRV_RX4581
          This driver can also be built as a module. If so the module
          will be called rtc-rx4581.
 
-config RTC_DRV_RX6110
-       tristate "Epson RX-6110"
-       select REGMAP_SPI
-       help
-         If you say yes here you will get support for the Epson RX-6610.
-
-         This driver can also be built as a module. If so the module
-         will be called rtc-rx6110.
-
 config RTC_DRV_RS5C348
        tristate "Ricoh RS5C348A/B"
        help
@@ -936,6 +927,17 @@ config RTC_DRV_RV3029_HWMON
          Say Y here if you want to expose temperature sensor data on
          rtc-rv3029.
 
+config RTC_DRV_RX6110
+       tristate "Epson RX-6110"
+       depends on RTC_I2C_AND_SPI
+       select REGMAP_SPI if SPI_MASTER
+       select REGMAP_I2C if I2C
+       help
+         If you say yes here you will get support for the Epson RX-6110.
+
+         This driver can also be built as a module. If so the module
+         will be called rtc-rx6110.
+
 comment "Platform RTC drivers"
 
 # this 'CMOS' RTC driver is arch dependent because it requires
@@ -1017,6 +1019,7 @@ config RTC_DRV_DS1553
 
 config RTC_DRV_DS1685_FAMILY
        tristate "Dallas/Maxim DS1685 Family"
+       depends on HAS_IOMEM
        help
          If you say yes here you get support for the Dallas/Maxim DS1685
          family of real time chips.  This family includes the DS1685/DS1687,
@@ -1150,6 +1153,7 @@ config RTC_DRV_STK17TA8
 
 config RTC_DRV_M48T86
        tristate "ST M48T86/Dallas DS12887"
+       depends on HAS_IOMEM
        help
          If you say Y here you will get support for the
          ST M48T86 and Dallas DS12887 RTC chips.
@@ -1752,7 +1756,9 @@ config RTC_DRV_LOONGSON1
 
 config RTC_DRV_MXC
        tristate "Freescale MXC Real Time Clock"
-       depends on ARCH_MXC
+       depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
+       depends on OF
        help
           If you say yes here you get support for the Freescale MXC
           RTC module.
@@ -1762,7 +1768,9 @@ config RTC_DRV_MXC
 
 config RTC_DRV_MXC_V2
        tristate "Freescale MXC Real Time Clock for i.MX53"
-       depends on ARCH_MXC
+       depends on ARCH_MXC || COMPILE_TEST
+       depends on HAS_IOMEM
+       depends on OF
        help
           If you say yes here you get support for the Freescale MXC
           SRTC module in i.MX53 processor.
@@ -1935,7 +1943,6 @@ config RTC_DRV_HID_SENSOR_TIME
 config RTC_DRV_GOLDFISH
        tristate "Goldfish Real Time Clock"
        depends on OF && HAS_IOMEM
-       depends on GOLDFISH || COMPILE_TEST
        help
          Say yes to enable RTC driver for the Goldfish based virtual platform.
 
index 5855aa2..7e470fb 100644 (file)
@@ -28,6 +28,7 @@ static void rtc_device_release(struct device *dev)
        struct rtc_device *rtc = to_rtc_device(dev);
 
        ida_simple_remove(&rtc_ida, rtc->id);
+       mutex_destroy(&rtc->ops_lock);
        kfree(rtc);
 }
 
@@ -326,8 +327,10 @@ static void rtc_device_get_offset(struct rtc_device *rtc)
  *
  * @rtc: the RTC class device to destroy
  */
-static void rtc_device_unregister(struct rtc_device *rtc)
+static void devm_rtc_unregister_device(void *data)
 {
+       struct rtc_device *rtc = data;
+
        mutex_lock(&rtc->ops_lock);
        /*
         * Remove innards of this RTC, then disable it, before
@@ -337,60 +340,43 @@ static void rtc_device_unregister(struct rtc_device *rtc)
        cdev_device_del(&rtc->char_dev, &rtc->dev);
        rtc->ops = NULL;
        mutex_unlock(&rtc->ops_lock);
-       put_device(&rtc->dev);
 }
 
-static void devm_rtc_release_device(struct device *dev, void *res)
+static void devm_rtc_release_device(void *res)
 {
-       struct rtc_device *rtc = *(struct rtc_device **)res;
+       struct rtc_device *rtc = res;
 
-       rtc_nvmem_unregister(rtc);
-
-       if (rtc->registered)
-               rtc_device_unregister(rtc);
-       else
-               put_device(&rtc->dev);
+       put_device(&rtc->dev);
 }
 
 struct rtc_device *devm_rtc_allocate_device(struct device *dev)
 {
-       struct rtc_device **ptr, *rtc;
+       struct rtc_device *rtc;
        int id, err;
 
        id = rtc_device_get_id(dev);
        if (id < 0)
                return ERR_PTR(id);
 
-       ptr = devres_alloc(devm_rtc_release_device, sizeof(*ptr), GFP_KERNEL);
-       if (!ptr) {
-               err = -ENOMEM;
-               goto exit_ida;
-       }
-
        rtc = rtc_allocate_device();
        if (!rtc) {
-               err = -ENOMEM;
-               goto exit_devres;
+               ida_simple_remove(&rtc_ida, id);
+               return ERR_PTR(-ENOMEM);
        }
 
-       *ptr = rtc;
-       devres_add(dev, ptr);
-
        rtc->id = id;
        rtc->dev.parent = dev;
        dev_set_name(&rtc->dev, "rtc%d", id);
 
-       return rtc;
+       err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc);
+       if (err)
+               return ERR_PTR(err);
 
-exit_devres:
-       devres_free(ptr);
-exit_ida:
-       ida_simple_remove(&rtc_ida, id);
-       return ERR_PTR(err);
+       return rtc;
 }
 EXPORT_SYMBOL_GPL(devm_rtc_allocate_device);
 
-int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
+int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc)
 {
        struct rtc_wkalrm alrm;
        int err;
@@ -420,7 +406,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 
        rtc_proc_add_device(rtc);
 
-       rtc->registered = true;
        dev_info(rtc->dev.parent, "registered as %s\n",
                 dev_name(&rtc->dev));
 
@@ -429,9 +414,10 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
                rtc_hctosys(rtc);
 #endif
 
-       return 0;
+       return devm_add_action_or_reset(rtc->dev.parent,
+                                       devm_rtc_unregister_device, rtc);
 }
-EXPORT_SYMBOL_GPL(__rtc_register_device);
+EXPORT_SYMBOL_GPL(__devm_rtc_register_device);
 
 /**
  * devm_rtc_device_register - resource managed rtc_device_register()
@@ -461,7 +447,7 @@ struct rtc_device *devm_rtc_device_register(struct device *dev,
 
        rtc->ops = ops;
 
-       err = __rtc_register_device(owner, rtc);
+       err = __devm_rtc_register_device(owner, rtc);
        if (err)
                return ERR_PTR(err);
 
index 4312096..07ede21 100644 (file)
@@ -9,99 +9,22 @@
 #include <linux/types.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/rtc.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
 
-/*
- * Deprecated ABI compatibility, this should be removed at some point
- */
-
-static const char nvram_warning[] = "Deprecated ABI, please use nvmem";
-
-static ssize_t
-rtc_nvram_read(struct file *filp, struct kobject *kobj,
-              struct bin_attribute *attr,
-              char *buf, loff_t off, size_t count)
-{
-       dev_warn_once(kobj_to_dev(kobj), nvram_warning);
-
-       return nvmem_device_read(attr->private, off, count, buf);
-}
-
-static ssize_t
-rtc_nvram_write(struct file *filp, struct kobject *kobj,
-               struct bin_attribute *attr,
-               char *buf, loff_t off, size_t count)
-{
-       dev_warn_once(kobj_to_dev(kobj), nvram_warning);
-
-       return nvmem_device_write(attr->private, off, count, buf);
-}
-
-static int rtc_nvram_register(struct rtc_device *rtc,
-                             struct nvmem_device *nvmem, size_t size)
-{
-       int err;
-
-       rtc->nvram = kzalloc(sizeof(*rtc->nvram), GFP_KERNEL);
-       if (!rtc->nvram)
-               return -ENOMEM;
-
-       rtc->nvram->attr.name = "nvram";
-       rtc->nvram->attr.mode = 0644;
-       rtc->nvram->private = nvmem;
-
-       sysfs_bin_attr_init(rtc->nvram);
-
-       rtc->nvram->read = rtc_nvram_read;
-       rtc->nvram->write = rtc_nvram_write;
-       rtc->nvram->size = size;
-
-       err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
-                                   rtc->nvram);
-       if (err) {
-               kfree(rtc->nvram);
-               rtc->nvram = NULL;
-       }
-
-       return err;
-}
-
-static void rtc_nvram_unregister(struct rtc_device *rtc)
-{
-       sysfs_remove_bin_file(&rtc->dev.parent->kobj, rtc->nvram);
-       kfree(rtc->nvram);
-       rtc->nvram = NULL;
-}
-
-/*
- * New ABI, uses nvmem
- */
-int rtc_nvmem_register(struct rtc_device *rtc,
+int devm_rtc_nvmem_register(struct rtc_device *rtc,
                       struct nvmem_config *nvmem_config)
 {
+       struct device *dev = rtc->dev.parent;
        struct nvmem_device *nvmem;
 
        if (!nvmem_config)
                return -ENODEV;
 
-       nvmem_config->dev = rtc->dev.parent;
+       nvmem_config->dev = dev;
        nvmem_config->owner = rtc->owner;
-       nvmem = devm_nvmem_register(rtc->dev.parent, nvmem_config);
+       nvmem = devm_nvmem_register(dev, nvmem_config);
        if (IS_ERR(nvmem))
-               return PTR_ERR(nvmem);
-
-       /* Register the old ABI */
-       if (rtc->nvram_old_abi)
-               rtc_nvram_register(rtc, nvmem, nvmem_config->size);
+               dev_err(dev, "failed to register nvmem device for RTC\n");
 
-       return 0;
-}
-EXPORT_SYMBOL_GPL(rtc_nvmem_register);
-
-void rtc_nvmem_unregister(struct rtc_device *rtc)
-{
-       /* unregister the old ABI */
-       if (rtc->nvram)
-               rtc_nvram_unregister(rtc);
+       return PTR_ERR_OR_ZERO(nvmem);
 }
+EXPORT_SYMBOL_GPL(devm_rtc_nvmem_register);
index 75779e8..6a3f44c 100644 (file)
@@ -294,7 +294,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev)
        info->rtc_dev->ops = &pm80x_rtc_ops;
        info->rtc_dev->range_max = U32_MAX;
 
-       ret = rtc_register_device(info->rtc_dev);
+       ret = devm_rtc_register_device(info->rtc_dev);
        if (ret)
                goto out_rtc;
 
index c90457d..2c809a1 100644 (file)
@@ -307,7 +307,7 @@ static int pm860x_rtc_probe(struct platform_device *pdev)
        info->rtc_dev->ops = &pm860x_rtc_ops;
        info->rtc_dev->range_max = U32_MAX;
 
-       ret = rtc_register_device(info->rtc_dev);
+       ret = devm_rtc_register_device(info->rtc_dev);
        if (ret)
                return ret;
 
index 2370ac0..6e3e320 100644 (file)
@@ -892,7 +892,7 @@ static int abb5zes3_probe(struct i2c_client *client,
                }
        }
 
-       ret = rtc_register_device(data->rtc);
+       ret = devm_rtc_register_device(data->rtc);
 
 err:
        if (ret && data->irq)
index d690985..b20d8f2 100644 (file)
@@ -420,7 +420,7 @@ static int abeoz9_probe(struct i2c_client *client,
        data->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        data->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       ret = rtc_register_device(data->rtc);
+       ret = devm_rtc_register_device(data->rtc);
        if (ret)
                return ret;
 
index 2ed6def..e4fd961 100644 (file)
@@ -238,7 +238,7 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, rtc);
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct platform_driver ab3100_rtc_driver = {
index 3d60f32..b400488 100644 (file)
@@ -404,7 +404,7 @@ static int ab8500_rtc_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static int ab8500_rtc_remove(struct platform_device *pdev)
index 803725b..6733bb0 100644 (file)
@@ -851,7 +851,7 @@ static int abx80x_probe(struct i2c_client *client,
                return err;
        }
 
-       return rtc_register_device(priv->rtc);
+       return devm_rtc_register_device(priv->rtc);
 }
 
 static const struct i2c_device_id abx80x_id[] = {
index 2922393..1ddbef9 100644 (file)
@@ -610,7 +610,7 @@ static int ac100_rtc_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       return rtc_register_device(chip->rtc);
+       return devm_rtc_register_device(chip->rtc);
 }
 
 static int ac100_rtc_remove(struct platform_device *pdev)
index 94d7c22..807a79c 100644 (file)
@@ -556,7 +556,7 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev)
 
        rtc->rtc_dev->range_max = U32_MAX;
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 #ifdef CONFIG_PM_SLEEP
index eacdd06..a93352e 100644 (file)
@@ -104,7 +104,7 @@ static int aspeed_rtc_probe(struct platform_device *pdev)
        rtc->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_1900;
        rtc->rtc_dev->range_max = 38814989399LL; /* 3199-12-31 23:59:59 */
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 static const struct of_device_id aspeed_rtc_match[] = {
index 5e811e0..fe396d2 100644 (file)
 #define                AT91_RTC_UPDCAL         BIT(1)          /* Update Request Calendar Register */
 
 #define        AT91_RTC_MR             0x04                    /* Mode Register */
+#define                AT91_RTC_HRMOD          BIT(0)          /* 12/24 hour mode */
+#define                AT91_RTC_NEGPPM         BIT(4)          /* Negative PPM correction */
+#define                AT91_RTC_CORRECTION     GENMASK(14, 8)  /* Slow clock correction */
+#define                AT91_RTC_HIGHPPM        BIT(15)         /* High PPM correction */
 
 #define        AT91_RTC_TIMR           0x08                    /* Time Register */
 #define                AT91_RTC_SEC            GENMASK(6, 0)   /* Current Second */
@@ -77,6 +81,9 @@
 #define                AT91_RTC_NVTIMALR       BIT(2)          /* Non valid Time Alarm */
 #define                AT91_RTC_NVCALALR       BIT(3)          /* Non valid Calendar Alarm */
 
+#define AT91_RTC_CORR_DIVIDEND         3906000
+#define AT91_RTC_CORR_LOW_RATIO                20
+
 #define at91_rtc_read(field) \
        readl_relaxed(at91_rtc_regs + field)
 #define at91_rtc_write(field, val) \
@@ -84,6 +91,7 @@
 
 struct at91_rtc_config {
        bool use_shadow_imr;
+       bool has_correction;
 };
 
 static const struct at91_rtc_config *at91_rtc_config;
@@ -293,6 +301,75 @@ static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return 0;
 }
 
+static int at91_rtc_readoffset(struct device *dev, long *offset)
+{
+       u32 mr = at91_rtc_read(AT91_RTC_MR);
+       long val = FIELD_GET(AT91_RTC_CORRECTION, mr);
+
+       if (!val) {
+               *offset = 0;
+               return 0;
+       }
+
+       val++;
+
+       if (!(mr & AT91_RTC_NEGPPM))
+               val = -val;
+
+       if (!(mr & AT91_RTC_HIGHPPM))
+               val *= AT91_RTC_CORR_LOW_RATIO;
+
+       *offset = DIV_ROUND_CLOSEST(AT91_RTC_CORR_DIVIDEND, val);
+
+       return 0;
+}
+
+static int at91_rtc_setoffset(struct device *dev, long offset)
+{
+       long corr;
+       u32 mr;
+
+       if (offset > AT91_RTC_CORR_DIVIDEND / 2)
+               return -ERANGE;
+       if (offset < -AT91_RTC_CORR_DIVIDEND / 2)
+               return -ERANGE;
+
+       mr = at91_rtc_read(AT91_RTC_MR);
+       mr &= ~(AT91_RTC_NEGPPM | AT91_RTC_CORRECTION | AT91_RTC_HIGHPPM);
+
+       if (offset > 0)
+               mr |= AT91_RTC_NEGPPM;
+       else
+               offset = -offset;
+
+       /* offset less than 764 ppb, disable correction*/
+       if (offset < 764) {
+               at91_rtc_write(AT91_RTC_MR, mr & ~AT91_RTC_NEGPPM);
+
+               return 0;
+       }
+
+       /*
+        * 29208 ppb is the perfect cutoff between low range and high range
+        * low range values are never better than high range value after that.
+        */
+       if (offset < 29208) {
+               corr = DIV_ROUND_CLOSEST(AT91_RTC_CORR_DIVIDEND, offset * AT91_RTC_CORR_LOW_RATIO);
+       } else {
+               corr = DIV_ROUND_CLOSEST(AT91_RTC_CORR_DIVIDEND, offset);
+               mr |= AT91_RTC_HIGHPPM;
+       }
+
+       if (corr > 128)
+               corr = 128;
+
+       mr |= FIELD_PREP(AT91_RTC_CORRECTION, corr - 1);
+
+       at91_rtc_write(AT91_RTC_MR, mr);
+
+       return 0;
+}
+
 /*
  * IRQ handler for the RTC
  */
@@ -343,6 +420,10 @@ static const struct at91_rtc_config at91sam9x5_config = {
        .use_shadow_imr = true,
 };
 
+static const struct at91_rtc_config sama5d4_config = {
+       .has_correction = true,
+};
+
 static const struct of_device_id at91_rtc_dt_ids[] = {
        {
                .compatible = "atmel,at91rm9200-rtc",
@@ -352,10 +433,13 @@ static const struct of_device_id at91_rtc_dt_ids[] = {
                .data = &at91sam9x5_config,
        }, {
                .compatible = "atmel,sama5d4-rtc",
-               .data = &at91rm9200_config,
+               .data = &sama5d4_config,
        }, {
                .compatible = "atmel,sama5d2-rtc",
-               .data = &at91rm9200_config,
+               .data = &sama5d4_config,
+       }, {
+               .compatible = "microchip,sam9x60-rtc",
+               .data = &sama5d4_config,
        }, {
                /* sentinel */
        }
@@ -370,6 +454,16 @@ static const struct rtc_class_ops at91_rtc_ops = {
        .alarm_irq_enable = at91_rtc_alarm_irq_enable,
 };
 
+static const struct rtc_class_ops sama5d4_rtc_ops = {
+       .read_time      = at91_rtc_readtime,
+       .set_time       = at91_rtc_settime,
+       .read_alarm     = at91_rtc_readalarm,
+       .set_alarm      = at91_rtc_setalarm,
+       .alarm_irq_enable = at91_rtc_alarm_irq_enable,
+       .set_offset     = at91_rtc_setoffset,
+       .read_offset    = at91_rtc_readoffset,
+};
+
 /*
  * Initialize and install RTC driver
  */
@@ -416,7 +510,7 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
        }
 
        at91_rtc_write(AT91_RTC_CR, 0);
-       at91_rtc_write(AT91_RTC_MR, 0);         /* 24 hour mode */
+       at91_rtc_write(AT91_RTC_MR, at91_rtc_read(AT91_RTC_MR) & ~AT91_RTC_HRMOD);
 
        /* Disable all interrupts */
        at91_rtc_write_idr(AT91_RTC_ACKUPD | AT91_RTC_ALARM |
@@ -437,10 +531,14 @@ static int __init at91_rtc_probe(struct platform_device *pdev)
        if (!device_can_wakeup(&pdev->dev))
                device_init_wakeup(&pdev->dev, 1);
 
-       rtc->ops = &at91_rtc_ops;
+       if (at91_rtc_config->has_correction)
+               rtc->ops = &sama5d4_rtc_ops;
+       else
+               rtc->ops = &at91_rtc_ops;
+
        rtc->range_min = RTC_TIMESTAMP_BEGIN_1900;
        rtc->range_max = RTC_TIMESTAMP_END_2099;
-       ret = rtc_register_device(rtc);
+       ret = devm_rtc_register_device(rtc);
        if (ret)
                goto err_clk;
 
index e39e898..2216be4 100644 (file)
@@ -431,7 +431,7 @@ static int at91_rtc_probe(struct platform_device *pdev)
                dev_warn(&pdev->dev, "%s: SET TIME!\n",
                         dev_name(&rtc->rtcdev->dev));
 
-       return rtc_register_device(rtc->rtcdev);
+       return devm_rtc_register_device(rtc->rtcdev);
 
 err_clk:
        clk_disable_unprepare(rtc->sclk);
index 791bebc..e6428b2 100644 (file)
@@ -104,7 +104,7 @@ static int au1xtoy_rtc_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, rtcdev);
 
-       return rtc_register_device(rtcdev);
+       return devm_rtc_register_device(rtcdev);
 }
 
 static struct platform_driver au1xrtc_driver = {
index 4492b77..17cb67f 100644 (file)
@@ -604,7 +604,7 @@ static int bd70528_probe(struct platform_device *pdev)
                }
        }
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static const struct platform_device_id bd718x7_rtc_id[] = {
index 4fee57c..0366e2f 100644 (file)
@@ -252,7 +252,7 @@ static int brcmstb_waketmr_probe(struct platform_device *pdev)
        timer->rtc->ops = &brcmstb_waketmr_ops;
        timer->rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(timer->rtc);
+       ret = devm_rtc_register_device(timer->rtc);
        if (ret)
                goto err_notifier;
 
@@ -264,8 +264,7 @@ err_notifier:
        unregister_reboot_notifier(&timer->reboot_notifier);
 
 err_clk:
-       if (timer->clk)
-               clk_disable_unprepare(timer->clk);
+       clk_disable_unprepare(timer->clk);
 
        return ret;
 }
index 595d5d2..1edf7f1 100644 (file)
@@ -336,7 +336,7 @@ static int cdns_rtc_probe(struct platform_device *pdev)
        writel(0, crtc->regs + CDNS_RTC_HMR);
        writel(CDNS_RTC_KRTCR_KRTC, crtc->regs + CDNS_RTC_KRTCR);
 
-       ret = rtc_register_device(crtc->rtc_dev);
+       ret = devm_rtc_register_device(crtc->rtc_dev);
        if (ret)
                goto err_disable_wakeup;
 
index c5bcd2a..51e80bc 100644 (file)
@@ -863,8 +863,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                cmos_rtc.rtc->ops = &cmos_rtc_ops_no_alarm;
        }
 
-       cmos_rtc.rtc->nvram_old_abi = true;
-       retval = rtc_register_device(cmos_rtc.rtc);
+       retval = devm_rtc_register_device(cmos_rtc.rtc);
        if (retval)
                goto cleanup2;
 
@@ -873,8 +872,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
        /* export at least the first block of NVRAM */
        nvmem_cfg.size = address_space - NVRAM_OFFSET;
-       if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg))
-               dev_err(dev, "nvmem registration failed\n");
+       devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg);
 
        dev_info(dev, "%s%s, %d bytes nvram%s\n",
                 !is_valid_irq(rtc_irq) ? "no alarms" :
index da59917..168ced8 100644 (file)
@@ -203,7 +203,7 @@ static int __init coh901331_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, rtap);
 
-       ret = rtc_register_device(rtap->rtc);
+       ret = devm_rtc_register_device(rtap->rtc);
        if (ret)
                goto out_no_rtc;
 
index 800667d..afc8fcb 100644 (file)
@@ -269,7 +269,8 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
 
        rtc->alarm_irq = platform_get_irq(pdev, 0);
        err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL,
-                                       cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE,
+                                       cpcap_rtc_alarm_irq,
+                                       IRQF_TRIGGER_NONE | IRQF_ONESHOT,
                                        "rtc_alarm", rtc);
        if (err) {
                dev_err(dev, "Could not request alarm irq: %d\n", err);
@@ -285,7 +286,8 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
         */
        rtc->update_irq = platform_get_irq(pdev, 1);
        err = devm_request_threaded_irq(dev, rtc->update_irq, NULL,
-                                       cpcap_rtc_update_irq, IRQF_TRIGGER_NONE,
+                                       cpcap_rtc_update_irq,
+                                       IRQF_TRIGGER_NONE | IRQF_ONESHOT,
                                        "rtc_1hz", rtc);
        if (err) {
                dev_err(dev, "Could not request update irq: %d\n", err);
@@ -299,7 +301,7 @@ static int cpcap_rtc_probe(struct platform_device *pdev)
                /* ignore error and continue without wakeup support */
        }
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 static const struct of_device_id cpcap_rtc_of_match[] = {
index f7343c2..7062679 100644 (file)
@@ -350,7 +350,7 @@ static int cros_ec_rtc_probe(struct platform_device *pdev)
        cros_ec_rtc->rtc->ops = &cros_ec_rtc_ops;
        cros_ec_rtc->rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(cros_ec_rtc->rtc);
+       ret = devm_rtc_register_device(cros_ec_rtc->rtc);
        if (ret)
                return ret;
 
index 58de10d..9ca99bd 100644 (file)
@@ -304,7 +304,7 @@ static int da9052_rtc_probe(struct platform_device *pdev)
        rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rtc->rtc->range_max = RTC_TIMESTAMP_END_2063;
 
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret)
                return ret;
 
index 046b1d4..d4b72a9 100644 (file)
@@ -243,7 +243,7 @@ static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
        al_secs = rtc_tm_to_time64(&rtc->alarm_time);
 
        /* handle the rtc synchronisation delay */
-       if (rtc->rtc_sync == true && al_secs - tm_secs == 1)
+       if (rtc->rtc_sync && al_secs - tm_secs == 1)
                memcpy(tm, &rtc->alarm_time, sizeof(struct rtc_time));
        else
                rtc->rtc_sync = false;
@@ -494,7 +494,7 @@ static int da9063_rtc_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
                        irq_alarm, ret);
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 static struct platform_driver da9063_rtc_driver = {
index 73f87a1..6bef0f2 100644 (file)
@@ -484,7 +484,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
 
        device_init_wakeup(&pdev->dev, 0);
 
-       return rtc_register_device(davinci_rtc->rtc);
+       return devm_rtc_register_device(davinci_rtc->rtc);
 }
 
 static int __exit davinci_rtc_remove(struct platform_device *pdev)
index 200d85b..4fdfa5b 100644 (file)
@@ -202,7 +202,7 @@ static int __init dc_rtc_probe(struct platform_device *pdev)
        rtc->rtc_dev->ops = &dc_rtc_ops;
        rtc->rtc_dev->range_max = U32_MAX;
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 static const struct of_device_id dc_dt_ids[] = {
index cd947a2..94fb16a 100644 (file)
@@ -132,7 +132,7 @@ static int dm355evm_rtc_probe(struct platform_device *pdev)
        rtc->ops = &dm355evm_rtc_ops;
        rtc->range_max = U32_MAX;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 /*
index a3d7908..8c2ab29 100644 (file)
@@ -694,12 +694,11 @@ static int ds1305_probe(struct spi_device *spi)
        ds1305->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
        ds1305_nvmem_cfg.priv = ds1305;
-       ds1305->rtc->nvram_old_abi = true;
-       status = rtc_register_device(ds1305->rtc);
+       status = devm_rtc_register_device(ds1305->rtc);
        if (status)
                return status;
 
-       rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg);
+       devm_rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg);
 
        /* Maybe set up alarm IRQ; be ready to handle it triggering right
         * away.  NOTE that we don't share this.  The signal is active low,
index 9f5f54c..183cf7c 100644 (file)
@@ -8,12 +8,12 @@
  *  Copyright (C) 2012 Bertrand Achard (nvram access fixes)
  */
 
-#include <linux/acpi.h>
 #include <linux/bcd.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
 #include <linux/rtc/ds1307.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -31,6 +31,7 @@
  * That's a natural job for a factory or repair bench.
  */
 enum ds_type {
+       unknown_ds_type, /* always first and 0 */
        ds_1307,
        ds_1308,
        ds_1337,
@@ -1090,7 +1091,6 @@ static const struct i2c_device_id ds1307_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds1307_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id ds1307_of_match[] = {
        {
                .compatible = "dallas,ds1307",
@@ -1167,32 +1167,6 @@ static const struct of_device_id ds1307_of_match[] = {
        { }
 };
 MODULE_DEVICE_TABLE(of, ds1307_of_match);
-#endif
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id ds1307_acpi_ids[] = {
-       { .id = "DS1307", .driver_data = ds_1307 },
-       { .id = "DS1308", .driver_data = ds_1308 },
-       { .id = "DS1337", .driver_data = ds_1337 },
-       { .id = "DS1338", .driver_data = ds_1338 },
-       { .id = "DS1339", .driver_data = ds_1339 },
-       { .id = "DS1388", .driver_data = ds_1388 },
-       { .id = "DS1340", .driver_data = ds_1340 },
-       { .id = "DS1341", .driver_data = ds_1341 },
-       { .id = "DS3231", .driver_data = ds_3231 },
-       { .id = "M41T0", .driver_data = m41t0 },
-       { .id = "M41T00", .driver_data = m41t00 },
-       { .id = "M41T11", .driver_data = m41t11 },
-       { .id = "MCP7940X", .driver_data = mcp794xx },
-       { .id = "MCP7941X", .driver_data = mcp794xx },
-       { .id = "PT7C4338", .driver_data = ds_1307 },
-       { .id = "RX8025", .driver_data = rx_8025 },
-       { .id = "ISL12057", .driver_data = ds_1337 },
-       { .id = "RX8130", .driver_data = rx_8130 },
-       { }
-};
-MODULE_DEVICE_TABLE(acpi, ds1307_acpi_ids);
-#endif
 
 /*
  * The ds1337 and ds1339 both have two alarms, but we only use the first
@@ -1626,13 +1600,16 @@ static const struct clk_ops ds3231_clk_32khz_ops = {
        .recalc_rate = ds3231_clk_32khz_recalc_rate,
 };
 
+static const char *ds3231_clks_names[] = {
+       [DS3231_CLK_SQW] = "ds3231_clk_sqw",
+       [DS3231_CLK_32KHZ] = "ds3231_clk_32khz",
+};
+
 static struct clk_init_data ds3231_clks_init[] = {
        [DS3231_CLK_SQW] = {
-               .name = "ds3231_clk_sqw",
                .ops = &ds3231_clk_sqw_ops,
        },
        [DS3231_CLK_32KHZ] = {
-               .name = "ds3231_clk_32khz",
                .ops = &ds3231_clk_32khz_ops,
        },
 };
@@ -1653,6 +1630,11 @@ static int ds3231_clks_register(struct ds1307 *ds1307)
        if (!onecell->clks)
                return -ENOMEM;
 
+       /* optional override of the clockname */
+       device_property_read_string_array(ds1307->dev, "clock-output-names",
+                                         ds3231_clks_names,
+                                         ARRAY_SIZE(ds3231_clks_names));
+
        for (i = 0; i < ARRAY_SIZE(ds3231_clks_init); i++) {
                struct clk_init_data init = ds3231_clks_init[i];
 
@@ -1663,9 +1645,7 @@ static int ds3231_clks_register(struct ds1307 *ds1307)
                if (i == DS3231_CLK_SQW && test_bit(HAS_ALARM, &ds1307->flags))
                        continue;
 
-               /* optional override of the clockname */
-               of_property_read_string_index(node, "clock-output-names", i,
-                                             &init.name);
+               init.name = ds3231_clks_names[i];
                ds1307->clks[i].init = &init;
 
                onecell->clks[i] = devm_clk_register(ds1307->dev,
@@ -1674,10 +1654,8 @@ static int ds3231_clks_register(struct ds1307 *ds1307)
                        return PTR_ERR(onecell->clks[i]);
        }
 
-       if (!node)
-               return 0;
-
-       of_clk_add_provider(node, of_clk_src_onecell_get, onecell);
+       if (node)
+               of_clk_add_provider(node, of_clk_src_onecell_get, onecell);
 
        return 0;
 }
@@ -1761,6 +1739,7 @@ static int ds1307_probe(struct i2c_client *client,
                        const struct i2c_device_id *id)
 {
        struct ds1307           *ds1307;
+       const void              *match;
        int                     err = -ENODEV;
        int                     tmp;
        const struct chip_desc  *chip;
@@ -1786,22 +1765,15 @@ static int ds1307_probe(struct i2c_client *client,
 
        i2c_set_clientdata(client, ds1307);
 
-       if (client->dev.of_node) {
-               ds1307->type = (enum ds_type)
-                       of_device_get_match_data(&client->dev);
+       match = device_get_match_data(&client->dev);
+       if (match) {
+               ds1307->type = (enum ds_type)match;
                chip = &chips[ds1307->type];
        } else if (id) {
                chip = &chips[id->driver_data];
                ds1307->type = id->driver_data;
        } else {
-               const struct acpi_device_id *acpi_id;
-
-               acpi_id = acpi_match_device(ACPI_PTR(ds1307_acpi_ids),
-                                           ds1307->dev);
-               if (!acpi_id)
-                       return -ENODEV;
-               chip = &chips[acpi_id->driver_data];
-               ds1307->type = acpi_id->driver_data;
+               return -ENODEV;
        }
 
        want_irq = client->irq > 0 && chip->alarm;
@@ -1819,7 +1791,6 @@ static int ds1307_probe(struct i2c_client *client,
                             trickle_charger_setup);
        }
 
-#ifdef CONFIG_OF
 /*
  * For devices with no IRQ directly connected to the SoC, the RTC chip
  * can be forced as a wakeup source by stating that explicitly in
@@ -1828,10 +1799,8 @@ static int ds1307_probe(struct i2c_client *client,
  * This will guarantee the 'wakealarm' sysfs entry is available on the device,
  * if supported by the RTC.
  */
-       if (chip->alarm && of_property_read_bool(client->dev.of_node,
-                                                "wakeup-source"))
+       if (chip->alarm && device_property_read_bool(&client->dev, "wakeup-source"))
                ds1307_can_wakeup_device = true;
-#endif
 
        switch (ds1307->type) {
        case ds_1337:
@@ -2032,7 +2001,7 @@ static int ds1307_probe(struct i2c_client *client,
        if (err)
                return err;
 
-       err = rtc_register_device(ds1307->rtc);
+       err = devm_rtc_register_device(ds1307->rtc);
        if (err)
                return err;
 
@@ -2047,8 +2016,7 @@ static int ds1307_probe(struct i2c_client *client,
                        .priv = ds1307,
                };
 
-               ds1307->rtc->nvram_old_abi = true;
-               rtc_nvmem_register(ds1307->rtc, &nvmem_cfg);
+               devm_rtc_nvmem_register(ds1307->rtc, &nvmem_cfg);
        }
 
        ds1307_hwmon_register(ds1307);
@@ -2064,8 +2032,7 @@ exit:
 static struct i2c_driver ds1307_driver = {
        .driver = {
                .name   = "rtc-ds1307",
-               .of_match_table = of_match_ptr(ds1307_of_match),
-               .acpi_match_table = ACPI_PTR(ds1307_acpi_ids),
+               .of_match_table = ds1307_of_match,
        },
        .probe          = ds1307_probe,
        .id_table       = ds1307_id,
index ba14342..f14ed6c 100644 (file)
@@ -399,7 +399,6 @@ static int ds1343_probe(struct spi_device *spi)
        if (IS_ERR(priv->rtc))
                return PTR_ERR(priv->rtc);
 
-       priv->rtc->nvram_old_abi = true;
        priv->rtc->ops = &ds1343_rtc_ops;
        priv->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        priv->rtc->range_max = RTC_TIMESTAMP_END_2099;
@@ -409,12 +408,12 @@ static int ds1343_probe(struct spi_device *spi)
                dev_err(&spi->dev,
                        "unable to create sysfs entries for rtc ds1343\n");
 
-       res = rtc_register_device(priv->rtc);
+       res = devm_rtc_register_device(priv->rtc);
        if (res)
                return res;
 
        nvmem_cfg.priv = priv;
-       rtc_nvmem_register(priv->rtc, &nvmem_cfg);
+       devm_rtc_nvmem_register(priv->rtc, &nvmem_cfg);
 
        priv->irq = spi->irq;
 
index 7025cf3..157bf52 100644 (file)
@@ -166,7 +166,7 @@ static int ds1347_probe(struct spi_device *spi)
        rtc->range_min = RTC_TIMESTAMP_BEGIN_0000;
        rtc->range_max = RTC_TIMESTAMP_END_9999;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct spi_driver ds1347_driver = {
index 177d870..fab7992 100644 (file)
@@ -508,7 +508,7 @@ static int ds1374_probe(struct i2c_client *client,
        ds1374->rtc->ops = &ds1374_rtc_ops;
        ds1374->rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(ds1374->rtc);
+       ret = devm_rtc_register_device(ds1374->rtc);
        if (ret)
                return ret;
 
index a63872c..bda8843 100644 (file)
@@ -466,13 +466,11 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
 
        pdata->rtc->ops = &ds1511_rtc_ops;
 
-       pdata->rtc->nvram_old_abi = true;
-
-       ret = rtc_register_device(pdata->rtc);
+       ret = devm_rtc_register_device(pdata->rtc);
        if (ret)
                return ret;
 
-       rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg);
+       devm_rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg);
 
        /*
         * if the platform has an interrupt in mind for this device,
index cdf5e05..dbff5b6 100644 (file)
@@ -294,9 +294,8 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(pdata->rtc);
 
        pdata->rtc->ops = &ds1553_rtc_ops;
-       pdata->rtc->nvram_old_abi = true;
 
-       ret = rtc_register_device(pdata->rtc);
+       ret = devm_rtc_register_device(pdata->rtc);
        if (ret)
                return ret;
 
@@ -310,8 +309,7 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
                }
        }
 
-       if (rtc_nvmem_register(pdata->rtc, &nvmem_cfg))
-               dev_err(&pdev->dev, "unable to register nvmem\n");
+       devm_rtc_nvmem_register(pdata->rtc, &nvmem_cfg);
 
        return 0;
 }
index 9da84df..6304937 100644 (file)
@@ -124,7 +124,7 @@ static int ds1672_probe(struct i2c_client *client,
        rtc->ops = &ds1672_rtc_ops;
        rtc->range_max = U32_MAX;
 
-       err = rtc_register_device(rtc);
+       err = devm_rtc_register_device(rtc);
        if (err)
                return err;
 
index dfbd7b8..d69c807 100644 (file)
@@ -1316,13 +1316,12 @@ ds1685_rtc_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       rtc_dev->nvram_old_abi = true;
        nvmem_cfg.priv = rtc;
-       ret = rtc_nvmem_register(rtc_dev, &nvmem_cfg);
+       ret = devm_rtc_nvmem_register(rtc_dev, &nvmem_cfg);
        if (ret)
                return ret;
 
-       return rtc_register_device(rtc_dev);
+       return devm_rtc_register_device(rtc_dev);
 }
 
 /**
index 2b949f0..13d45c6 100644 (file)
@@ -190,14 +190,12 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(rtc);
 
        rtc->ops = &ds1742_rtc_ops;
-       rtc->nvram_old_abi = true;
 
-       ret = rtc_register_device(rtc);
+       ret = devm_rtc_register_device(rtc);
        if (ret)
                return ret;
 
-       if (rtc_nvmem_register(rtc, &nvmem_cfg))
-               dev_err(&pdev->dev, "Unable to register nvmem\n");
+       devm_rtc_nvmem_register(rtc, &nvmem_cfg);
 
        return 0;
 }
index 9df0c44..0480f59 100644 (file)
@@ -234,7 +234,7 @@ static int rtc_probe(struct platform_device *pdev)
        chip->rtc->ops = &ds2404_rtc_ops;
        chip->rtc->range_max = U32_MAX;
 
-       retval = rtc_register_device(chip->rtc);
+       retval = devm_rtc_register_device(chip->rtc);
        if (retval)
                return retval;
 
index 69c37ab..16b8903 100644 (file)
@@ -518,7 +518,7 @@ static int ds3232_probe(struct device *dev, struct regmap *regmap, int irq,
        if (IS_ERR(ds3232->rtc))
                return PTR_ERR(ds3232->rtc);
 
-       ret = rtc_nvmem_register(ds3232->rtc, &nvmem_cfg);
+       ret = devm_rtc_nvmem_register(ds3232->rtc, &nvmem_cfg);
        if(ret)
                return ret;
 
index 8ec9ea1..acae7f1 100644 (file)
@@ -33,7 +33,7 @@ struct ep93xx_rtc {
 static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload,
                                 unsigned short *delete)
 {
-       struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev);
+       struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev);
        unsigned long comp;
 
        comp = readl(ep93xx_rtc->mmio_base + EP93XX_RTC_SWCOMP);
@@ -51,7 +51,7 @@ static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload,
 
 static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev);
+       struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev);
        unsigned long time;
 
        time = readl(ep93xx_rtc->mmio_base + EP93XX_RTC_DATA);
@@ -62,7 +62,7 @@ static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 static int ep93xx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
-       struct ep93xx_rtc *ep93xx_rtc = dev_get_platdata(dev);
+       struct ep93xx_rtc *ep93xx_rtc = dev_get_drvdata(dev);
        unsigned long secs = rtc_tm_to_time64(tm);
 
        writel(secs + 1, ep93xx_rtc->mmio_base + EP93XX_RTC_LOAD);
@@ -145,7 +145,7 @@ static int ep93xx_rtc_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       return rtc_register_device(ep93xx_rtc->rtc);
+       return devm_rtc_register_device(ep93xx_rtc->rtc);
 }
 
 static struct platform_driver ep93xx_rtc_driver = {
index 48d3b38..57cc09d 100644 (file)
@@ -290,7 +290,7 @@ static int ftm_rtc_probe(struct platform_device *pdev)
        if (ret)
                dev_err(&pdev->dev, "failed to enable irq wake\n");
 
-       ret = rtc_register_device(rtc->rtc_dev);
+       ret = devm_rtc_register_device(rtc->rtc_dev);
        if (ret) {
                dev_err(&pdev->dev, "can't register rtc device\n");
                return ret;
index 0919f7d..ad3add5 100644 (file)
@@ -176,7 +176,7 @@ static int ftrtc010_rtc_probe(struct platform_device *pdev)
        if (unlikely(ret))
                return ret;
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 static int ftrtc010_rtc_remove(struct platform_device *pdev)
index 6349d2c..7ab95d0 100644 (file)
@@ -194,7 +194,7 @@ static int goldfish_rtc_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       return rtc_register_device(rtcdrv->rtc);
+       return devm_rtc_register_device(rtcdrv->rtc);
 }
 
 static const struct of_device_id goldfish_rtc_of_match[] = {
index 0fb79c4..24e0095 100644 (file)
@@ -527,8 +527,6 @@ static int hym8563_probe(struct i2c_client *client,
        hym8563->client = client;
        i2c_set_clientdata(client, hym8563);
 
-       device_set_wakeup_capable(&client->dev, true);
-
        ret = hym8563_init_device(client);
        if (ret) {
                dev_err(&client->dev, "could not init device, %d\n", ret);
@@ -547,6 +545,11 @@ static int hym8563_probe(struct i2c_client *client,
                }
        }
 
+       if (client->irq > 0 ||
+           device_property_read_bool(&client->dev, "wakeup-source")) {
+               device_init_wakeup(&client->dev, true);
+       }
+
        /* check state of calendar information */
        ret = i2c_smbus_read_byte_data(client, HYM8563_SEC);
        if (ret < 0)
index a5f59e6..cc9fbab 100644 (file)
@@ -166,7 +166,7 @@ static int imx_sc_rtc_probe(struct platform_device *pdev)
        imx_sc_rtc->range_min = 0;
        imx_sc_rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(imx_sc_rtc);
+       ret = devm_rtc_register_device(imx_sc_rtc);
        if (ret)
                return ret;
 
index 8d141d8..c2692da 100644 (file)
@@ -814,7 +814,7 @@ static int __init dryice_rtc_probe(struct platform_device *pdev)
        imxdi->rtc->ops = &dryice_rtc_ops;
        imxdi->rtc->range_max = U32_MAX;
 
-       rc = rtc_register_device(imxdi->rtc);
+       rc = devm_rtc_register_device(imxdi->rtc);
        if (rc)
                goto err;
 
index 5b6b17f..1fc6627 100644 (file)
@@ -465,11 +465,11 @@ static int isl12026_probe_new(struct i2c_client *client)
 
        priv->rtc->ops = &isl12026_rtc_ops;
        nvm_cfg.priv = priv;
-       ret = rtc_nvmem_register(priv->rtc, &nvm_cfg);
+       ret = devm_rtc_nvmem_register(priv->rtc, &nvm_cfg);
        if (ret)
                return ret;
 
-       return rtc_register_device(priv->rtc);
+       return devm_rtc_register_device(priv->rtc);
 }
 
 static int isl12026_remove(struct i2c_client *client)
index ebb691f..563a6d9 100644 (file)
@@ -890,11 +890,11 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
        if (rc)
                return rc;
 
-       rc = rtc_nvmem_register(isl1208->rtc, &isl1208->nvmem_config);
+       rc = devm_rtc_nvmem_register(isl1208->rtc, &isl1208->nvmem_config);
        if (rc)
                return rc;
 
-       return rtc_register_device(isl1208->rtc);
+       return devm_rtc_register_device(isl1208->rtc);
 }
 
 static struct i2c_driver isl1208_driver = {
index 9607e6b..6e51df7 100644 (file)
@@ -375,7 +375,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
        /* Each 1 Hz pulse should happen after (rate) ticks */
        jz4740_rtc_reg_write(rtc, JZ_REG_RTC_REGULATOR, rate - 1);
 
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret)
                return ret;
 
index 15d8abd..76ad703 100644 (file)
@@ -239,7 +239,7 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev)
        rtc->rtc->ops = &lpc32xx_rtc_ops;
        rtc->rtc->range_max = U32_MAX;
 
-       err = rtc_register_device(rtc->rtc);
+       err = devm_rtc_register_device(rtc->rtc);
        if (err)
                return err;
 
index 8bd3405..5af26dc 100644 (file)
@@ -176,7 +176,7 @@ static int ls1x_rtc_probe(struct platform_device *pdev)
        rtcdev->range_min = RTC_TIMESTAMP_BEGIN_1900;
        rtcdev->range_max = RTC_TIMESTAMP_END_2099;
 
-       return rtc_register_device(rtcdev);
+       return devm_rtc_register_device(rtcdev);
 }
 
 static struct platform_driver  ls1x_rtc_driver = {
index 8a89bc5..160dcf6 100644 (file)
@@ -977,7 +977,7 @@ static int m41t80_probe(struct i2c_client *client,
                m41t80_sqw_register_clk(m41t80_data);
 #endif
 
-       rc = rtc_register_device(m41t80_data->rtc);
+       rc = devm_rtc_register_device(m41t80_data->rtc);
        if (rc)
                return rc;
 
index 67e2187..5f5898d 100644 (file)
@@ -463,15 +463,14 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
        if (IS_ERR(m48t59->rtc))
                return PTR_ERR(m48t59->rtc);
 
-       m48t59->rtc->nvram_old_abi = true;
        m48t59->rtc->ops = ops;
 
        nvmem_cfg.size = pdata->offset;
-       ret = rtc_nvmem_register(m48t59->rtc, &nvmem_cfg);
+       ret = devm_rtc_nvmem_register(m48t59->rtc, &nvmem_cfg);
        if (ret)
                return ret;
 
-       ret = rtc_register_device(m48t59->rtc);
+       ret = devm_rtc_register_device(m48t59->rtc);
        if (ret)
                return ret;
 
index 75a0e73..481c952 100644 (file)
@@ -254,13 +254,12 @@ static int m48t86_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(info->rtc);
 
        info->rtc->ops = &m48t86_rtc_ops;
-       info->rtc->nvram_old_abi = true;
 
-       err = rtc_register_device(info->rtc);
+       err = devm_rtc_register_device(info->rtc);
        if (err)
                return err;
 
-       rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg);
+       devm_rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg);
 
        /* read battery status */
        reg = m48t86_readb(&pdev->dev, M48T86_D);
index d6802e6..d4234e7 100644 (file)
@@ -307,7 +307,7 @@ static int __init mc13xxx_rtc_probe(struct platform_device *pdev)
 
        mc13xxx_unlock(mc13xxx);
 
-       ret = rtc_register_device(priv->rtc);
+       ret = devm_rtc_register_device(priv->rtc);
        if (ret) {
                mc13xxx_lock(mc13xxx);
                goto err_irq_request;
index e6bd080..1463c86 100644 (file)
@@ -83,7 +83,7 @@ static int meson_vrtc_probe(struct platform_device *pdev)
                return PTR_ERR(vrtc->rtc);
 
        vrtc->rtc->ops = &meson_vrtc_ops;
-       return rtc_register_device(vrtc->rtc);
+       return devm_rtc_register_device(vrtc->rtc);
 }
 
 static int __maybe_unused meson_vrtc_suspend(struct device *dev)
index 47ebcf8..8642c06 100644 (file)
@@ -365,11 +365,11 @@ static int meson_rtc_probe(struct platform_device *pdev)
        }
 
        meson_rtc_nvmem_config.priv = rtc;
-       ret = rtc_nvmem_register(rtc->rtc, &meson_rtc_nvmem_config);
+       ret = devm_rtc_nvmem_register(rtc->rtc, &meson_rtc_nvmem_config);
        if (ret)
                goto out_disable_vdd;
 
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret)
                goto out_disable_vdd;
 
index 5c2ce71..bb2ea9b 100644 (file)
@@ -371,7 +371,7 @@ static int mpc5121_rtc_probe(struct platform_device *op)
                rtc->rtc->range_max = U32_MAX;
        }
 
-       err = rtc_register_device(rtc->rtc);
+       err = devm_rtc_register_device(rtc->rtc);
        if (err)
                goto out_dispose2;
 
index 17bf539..421b3b6 100644 (file)
@@ -361,7 +361,7 @@ static int vrtc_mrst_do_probe(struct device *dev, struct resource *iomem,
                }
        }
 
-       retval = rtc_register_device(mrst_rtc.rtc);
+       retval = devm_rtc_register_device(mrst_rtc.rtc);
        if (retval)
                goto cleanup0;
 
index d5f691c..cd92a97 100644 (file)
@@ -352,7 +352,7 @@ static int mt2712_rtc_probe(struct platform_device *pdev)
        mt2712_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        mt2712_rtc->rtc->range_max = MT2712_RTC_TIMESTAMP_END_2127;
 
-       return rtc_register_device(mt2712_rtc->rtc);
+       return devm_rtc_register_device(mt2712_rtc->rtc);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 1894ade..6655035 100644 (file)
@@ -301,7 +301,7 @@ static int mtk_rtc_probe(struct platform_device *pdev)
 
        rtc->rtc_dev->ops = &mtk_rtc_ops;
 
-       return rtc_register_device(rtc->rtc_dev);
+       return devm_rtc_register_device(rtc->rtc_dev);
 }
 
 #ifdef CONFIG_PM_SLEEP
index d5f190e..f8e2ece 100644 (file)
@@ -278,7 +278,7 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
        pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pdata->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       ret = rtc_register_device(pdata->rtc);
+       ret = devm_rtc_register_device(pdata->rtc);
        if (!ret)
                return 0;
 out:
index a8cfbde..65b29b0 100644 (file)
@@ -70,27 +70,12 @@ struct rtc_plat_data {
        enum imx_rtc_type devtype;
 };
 
-static const struct platform_device_id imx_rtc_devtype[] = {
-       {
-               .name = "imx1-rtc",
-               .driver_data = IMX1_RTC,
-       }, {
-               .name = "imx21-rtc",
-               .driver_data = IMX21_RTC,
-       }, {
-               /* sentinel */
-       }
-};
-MODULE_DEVICE_TABLE(platform, imx_rtc_devtype);
-
-#ifdef CONFIG_OF
 static const struct of_device_id imx_rtc_dt_ids[] = {
        { .compatible = "fsl,imx1-rtc", .data = (const void *)IMX1_RTC },
        { .compatible = "fsl,imx21-rtc", .data = (const void *)IMX21_RTC },
        {}
 };
 MODULE_DEVICE_TABLE(of, imx_rtc_dt_ids);
-#endif
 
 static inline int is_imx1_rtc(struct rtc_plat_data *data)
 {
@@ -322,17 +307,12 @@ static int mxc_rtc_probe(struct platform_device *pdev)
        u32 reg;
        unsigned long rate;
        int ret;
-       const struct of_device_id *of_id;
 
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
 
-       of_id = of_match_device(imx_rtc_dt_ids, &pdev->dev);
-       if (of_id)
-               pdata->devtype = (enum imx_rtc_type)of_id->data;
-       else
-               pdata->devtype = pdev->id_entry->driver_data;
+       pdata->devtype = (enum imx_rtc_type)of_device_get_match_data(&pdev->dev);
 
        pdata->ioaddr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(pdata->ioaddr))
@@ -428,7 +408,7 @@ static int mxc_rtc_probe(struct platform_device *pdev)
                        dev_err(&pdev->dev, "failed to enable irq wake\n");
        }
 
-       ret = rtc_register_device(rtc);
+       ret = devm_rtc_register_device(rtc);
 
        return ret;
 }
@@ -438,7 +418,6 @@ static struct platform_driver mxc_rtc_driver = {
                   .name        = "mxc_rtc",
                   .of_match_table = of_match_ptr(imx_rtc_dt_ids),
        },
-       .id_table = imx_rtc_devtype,
        .probe = mxc_rtc_probe,
 };
 
index 9153456..0d73f6f 100644 (file)
@@ -354,7 +354,7 @@ static int mxc_rtc_probe(struct platform_device *pdev)
                return ret;
        }
 
-       ret = rtc_register_device(pdata->rtc);
+       ret = devm_rtc_register_device(pdata->rtc);
        if (ret < 0)
                clk_unprepare(pdata->clk);
 
index c20fc79..dc7db24 100644 (file)
@@ -879,18 +879,18 @@ static int omap_rtc_probe(struct platform_device *pdev)
        /* Support ext_wakeup pinconf */
        rtc_pinctrl_desc.name = dev_name(&pdev->dev);
 
-       rtc->pctldev = pinctrl_register(&rtc_pinctrl_desc, &pdev->dev, rtc);
+       rtc->pctldev = devm_pinctrl_register(&pdev->dev, &rtc_pinctrl_desc, rtc);
        if (IS_ERR(rtc->pctldev)) {
                dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
                ret = PTR_ERR(rtc->pctldev);
                goto err;
        }
 
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret)
-               goto err_deregister_pinctrl;
+               goto err;
 
-       rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config);
+       devm_rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config);
 
        if (rtc->is_pmic_controller) {
                if (!pm_power_off) {
@@ -901,8 +901,6 @@ static int omap_rtc_probe(struct platform_device *pdev)
 
        return 0;
 
-err_deregister_pinctrl:
-       pinctrl_unregister(rtc->pctldev);
 err:
        clk_disable_unprepare(rtc->clk);
        device_init_wakeup(&pdev->dev, false);
@@ -945,9 +943,6 @@ static int omap_rtc_remove(struct platform_device *pdev)
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 
-       /* Remove ext_wakeup pinconf */
-       pinctrl_unregister(rtc->pctldev);
-
        return 0;
 }
 
index 178bfb1..8c7a98a 100644 (file)
@@ -163,7 +163,7 @@ static int __init pcap_rtc_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       return rtc_register_device(pcap_rtc->rtc);
+       return devm_rtc_register_device(pcap_rtc->rtc);
 }
 
 static int __exit pcap_rtc_remove(struct platform_device *pdev)
index c3691fa..534ffc9 100644 (file)
@@ -434,7 +434,7 @@ static int pcf2123_probe(struct spi_device *spi)
        rtc->range_max = RTC_TIMESTAMP_END_2099;
        rtc->set_start_time = true;
 
-       ret = rtc_register_device(rtc);
+       ret = devm_rtc_register_device(rtc);
        if (ret)
                return ret;
 
index 07a5630..39a7b51 100644 (file)
@@ -243,10 +243,8 @@ static int pcf2127_nvmem_read(void *priv, unsigned int offset,
        if (ret)
                return ret;
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD,
-                              val, bytes);
-
-       return ret ?: bytes;
+       return regmap_bulk_read(pcf2127->regmap, PCF2127_REG_RAM_RD_CMD,
+                               val, bytes);
 }
 
 static int pcf2127_nvmem_write(void *priv, unsigned int offset,
@@ -261,10 +259,8 @@ static int pcf2127_nvmem_write(void *priv, unsigned int offset,
        if (ret)
                return ret;
 
-       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD,
-                               val, bytes);
-
-       return ret ?: bytes;
+       return regmap_bulk_write(pcf2127->regmap, PCF2127_REG_RAM_WRT_CMD,
+                                val, bytes);
 }
 
 /* watchdog driver */
@@ -335,6 +331,37 @@ static const struct watchdog_ops pcf2127_watchdog_ops = {
        .set_timeout = pcf2127_wdt_set_timeout,
 };
 
+static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
+{
+       u32 wdd_timeout;
+       int ret;
+
+       if (!IS_ENABLED(CONFIG_WATCHDOG) ||
+           !device_property_read_bool(dev, "reset-source"))
+               return 0;
+
+       pcf2127->wdd.parent = dev;
+       pcf2127->wdd.info = &pcf2127_wdt_info;
+       pcf2127->wdd.ops = &pcf2127_watchdog_ops;
+       pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN;
+       pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX;
+       pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT;
+       pcf2127->wdd.min_hw_heartbeat_ms = 500;
+       pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
+
+       watchdog_set_drvdata(&pcf2127->wdd, pcf2127);
+
+       /* Test if watchdog timer is started by bootloader */
+       ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout);
+       if (ret)
+               return ret;
+
+       if (wdd_timeout)
+               set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+
+       return devm_watchdog_register_device(dev, &pcf2127->wdd);
+}
+
 /* Alarm */
 static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
@@ -536,7 +563,6 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                         int alarm_irq, const char *name, bool has_nvmem)
 {
        struct pcf2127 *pcf2127;
-       u32 wdd_timeout;
        int ret = 0;
 
        dev_dbg(dev, "%s\n", __func__);
@@ -575,17 +601,6 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                pcf2127->rtc->ops = &pcf2127_rtc_alrm_ops;
        }
 
-       pcf2127->wdd.parent = dev;
-       pcf2127->wdd.info = &pcf2127_wdt_info;
-       pcf2127->wdd.ops = &pcf2127_watchdog_ops;
-       pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN;
-       pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX;
-       pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT;
-       pcf2127->wdd.min_hw_heartbeat_ms = 500;
-       pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
-
-       watchdog_set_drvdata(&pcf2127->wdd, pcf2127);
-
        if (has_nvmem) {
                struct nvmem_config nvmem_cfg = {
                        .priv = pcf2127,
@@ -594,7 +609,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                        .size = 512,
                };
 
-               ret = rtc_nvmem_register(pcf2127->rtc, &nvmem_cfg);
+               ret = devm_rtc_nvmem_register(pcf2127->rtc, &nvmem_cfg);
        }
 
        /*
@@ -615,19 +630,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                return ret;
        }
 
-       /* Test if watchdog timer is started by bootloader */
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout);
-       if (ret)
-               return ret;
-
-       if (wdd_timeout)
-               set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
-
-#ifdef CONFIG_WATCHDOG
-       ret = devm_watchdog_register_device(dev, &pcf2127->wdd);
-       if (ret)
-               return ret;
-#endif /* CONFIG_WATCHDOG */
+       pcf2127_watchdog_init(dev, pcf2127);
 
        /*
         * Disable battery low/switch-over timestamp and interrupts.
@@ -680,7 +683,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                return ret;
        }
 
-       return rtc_register_device(pcf2127->rtc);
+       return devm_rtc_register_device(pcf2127->rtc);
 }
 
 #ifdef CONFIG_OF
index f8b99cb..e19cf2a 100644 (file)
@@ -607,14 +607,14 @@ static int pcf85063_probe(struct i2c_client *client)
        }
 
        nvmem_cfg.priv = pcf85063->regmap;
-       rtc_nvmem_register(pcf85063->rtc, &nvmem_cfg);
+       devm_rtc_nvmem_register(pcf85063->rtc, &nvmem_cfg);
 
 #ifdef CONFIG_COMMON_CLK
        /* register clk in common clk framework */
        pcf85063_clkout_register_clk(pcf85063);
 #endif
 
-       return rtc_register_device(pcf85063->rtc);
+       return devm_rtc_register_device(pcf85063->rtc);
 }
 
 #ifdef CONFIG_OF
index 57d351d..5e1e7b2 100644 (file)
 #define DRIVER_NAME "rtc-pcf8523"
 
 #define REG_CONTROL1 0x00
-#define REG_CONTROL1_CAP_SEL (1 << 7)
-#define REG_CONTROL1_STOP    (1 << 5)
+#define REG_CONTROL1_CAP_SEL BIT(7)
+#define REG_CONTROL1_STOP    BIT(5)
 
 #define REG_CONTROL3 0x02
-#define REG_CONTROL3_PM_BLD (1 << 7) /* battery low detection disabled */
-#define REG_CONTROL3_PM_VDD (1 << 6) /* switch-over disabled */
-#define REG_CONTROL3_PM_DSM (1 << 5) /* direct switching mode */
+#define REG_CONTROL3_PM_BLD BIT(7) /* battery low detection disabled */
+#define REG_CONTROL3_PM_VDD BIT(6) /* switch-over disabled */
+#define REG_CONTROL3_PM_DSM BIT(5) /* direct switching mode */
 #define REG_CONTROL3_PM_MASK 0xe0
-#define REG_CONTROL3_BLF (1 << 2) /* battery low bit, read-only */
+#define REG_CONTROL3_BLF BIT(2) /* battery low bit, read-only */
 
 #define REG_SECONDS  0x03
-#define REG_SECONDS_OS (1 << 7)
+#define REG_SECONDS_OS BIT(7)
 
 #define REG_MINUTES  0x04
 #define REG_HOURS    0x05
@@ -226,17 +226,6 @@ static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm)
        u8 regs[8];
        int err;
 
-       /*
-        * The hardware can only store values between 0 and 99 in it's YEAR
-        * register (with 99 overflowing to 0 on increment).
-        * After 2100-02-28 we could start interpreting the year to be in the
-        * interval [2100, 2199], but there is no path to switch in a smooth way
-        * because the chip handles YEAR=0x00 (and the out-of-spec
-        * YEAR=0xa0) as a leap year, but 2100 isn't.
-        */
-       if (tm->tm_year < 100 || tm->tm_year >= 200)
-               return -EINVAL;
-
        err = pcf8523_stop_rtc(client);
        if (err < 0)
                return err;
@@ -356,12 +345,15 @@ static int pcf8523_probe(struct i2c_client *client,
        if (err < 0)
                return err;
 
-       rtc = devm_rtc_device_register(&client->dev, DRIVER_NAME,
-                                      &pcf8523_rtc_ops, THIS_MODULE);
+       rtc = devm_rtc_allocate_device(&client->dev);
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
 
-       return 0;
+       rtc->ops = &pcf8523_rtc_ops;
+       rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+       rtc->range_max = RTC_TIMESTAMP_END_2099;
+
+       return devm_rtc_register_device(rtc);
 }
 
 static const struct i2c_device_id pcf8523_id[] = {
index 3450d61..a574c8d 100644 (file)
@@ -418,11 +418,11 @@ static int pcf85363_probe(struct i2c_client *client,
                        pcf85363->rtc->ops = &rtc_ops_alarm;
        }
 
-       ret = rtc_register_device(pcf85363->rtc);
+       ret = devm_rtc_register_device(pcf85363->rtc);
 
        for (i = 0; i < config->num_nvram; i++) {
                nvmem_cfg[i].priv = pcf85363;
-               rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg[i]);
+               devm_rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg[i]);
        }
 
        return ret;
index 2dc30ea..de3e6c3 100644 (file)
@@ -582,7 +582,7 @@ static int pcf8563_probe(struct i2c_client *client,
                }
        }
 
-       err = rtc_register_device(pcf8563->rtc);
+       err = devm_rtc_register_device(pcf8563->rtc);
        if (err)
                return err;
 
index 2b69467..7fb9145 100644 (file)
@@ -338,7 +338,7 @@ static int pic32_rtc_probe(struct platform_device *pdev)
        pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pdata->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       ret = rtc_register_device(pdata->rtc);
+       ret = devm_rtc_register_device(pdata->rtc);
        if (ret)
                goto err_nortc;
 
index ebe03eb..5a88051 100644 (file)
@@ -121,7 +121,7 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id)
        if (ret)
                goto err_irq;
 
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret)
                goto err_reg;
 
index c6b8927..224bbf0 100644 (file)
@@ -361,14 +361,16 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
 
        device_init_wakeup(&adev->dev, true);
        ldata->rtc = devm_rtc_allocate_device(&adev->dev);
-       if (IS_ERR(ldata->rtc))
-               return PTR_ERR(ldata->rtc);
+       if (IS_ERR(ldata->rtc)) {
+               ret = PTR_ERR(ldata->rtc);
+               goto out;
+       }
 
        ldata->rtc->ops = ops;
        ldata->rtc->range_min = vendor->range_min;
        ldata->rtc->range_max = vendor->range_max;
 
-       ret = rtc_register_device(ldata->rtc);
+       ret = devm_rtc_register_device(ldata->rtc);
        if (ret)
                goto out;
 
index b45ee2c..0d9dd6f 100644 (file)
@@ -508,7 +508,7 @@ static int pm8xxx_rtc_probe(struct platform_device *pdev)
                return rc;
        }
 
-       return rtc_register_device(rtc_dd->rtc);
+       return devm_rtc_register_device(rtc_dd->rtc);
 }
 
 #ifdef CONFIG_PM_SLEEP
index f0336d6..6b09873 100644 (file)
@@ -56,7 +56,7 @@ static int __init ps3_rtc_probe(struct platform_device *dev)
 
        platform_set_drvdata(dev, rtc);
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct platform_driver ps3_rtc_driver = {
index 7ceb968..60a3c3d 100644 (file)
@@ -127,7 +127,7 @@ static int r9701_probe(struct spi_device *spi)
        rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct spi_driver r9701_driver = {
index dd1a209..e73102a 100644 (file)
@@ -426,7 +426,7 @@ static int rc5t619_rtc_probe(struct platform_device *pdev)
                dev_warn(&pdev->dev, "rc5t619 interrupt is disabled\n");
        }
 
-       return rtc_register_device(rtc->rtc);
+       return devm_rtc_register_device(rtc->rtc);
 }
 
 static struct platform_driver rc5t619_rtc_driver = {
index c0334c6..e920da8 100644 (file)
@@ -447,7 +447,7 @@ static int rk808_rtc_probe(struct platform_device *pdev)
                return ret;
        }
 
-       return rtc_register_device(rk808_rtc->rtc);
+       return devm_rtc_register_device(rk808_rtc->rtc);
 }
 
 static struct platform_driver rk808_rtc_driver = {
index 8776ead..44afa6d 100644 (file)
@@ -251,16 +251,15 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
                return PTR_ERR(rtc);
 
        rtc->ops = &rp5c01_rtc_ops;
-       rtc->nvram_old_abi = true;
 
        priv->rtc = rtc;
 
        nvmem_cfg.priv = priv;
-       error = rtc_nvmem_register(rtc, &nvmem_cfg);
+       error = devm_rtc_nvmem_register(rtc, &nvmem_cfg);
        if (error)
                return error;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct platform_driver rp5c01_rtc_driver = {
index 47c1367..fec633f 100644 (file)
@@ -197,7 +197,7 @@ static int rs5c348_probe(struct spi_device *spi)
 
        rtc->ops = &rs5c348_rtc_ops;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct spi_driver rs5c348_driver = {
index fa226f0..979407a 100644 (file)
@@ -886,14 +886,14 @@ static int rv3028_probe(struct i2c_client *client)
        rv3028->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rv3028->rtc->range_max = RTC_TIMESTAMP_END_2099;
        rv3028->rtc->ops = &rv3028_rtc_ops;
-       ret = rtc_register_device(rv3028->rtc);
+       ret = devm_rtc_register_device(rv3028->rtc);
        if (ret)
                return ret;
 
        nvmem_cfg.priv = rv3028->regmap;
-       rtc_nvmem_register(rv3028->rtc, &nvmem_cfg);
+       devm_rtc_nvmem_register(rv3028->rtc, &nvmem_cfg);
        eeprom_cfg.priv = rv3028;
-       rtc_nvmem_register(rv3028->rtc, &eeprom_cfg);
+       devm_rtc_nvmem_register(rv3028->rtc, &eeprom_cfg);
 
        rv3028->rtc->max_user_freq = 1;
 
index 6271823..dc1bda6 100644 (file)
@@ -750,12 +750,12 @@ static int rv3029_probe(struct device *dev, struct regmap *regmap, int irq,
        rv3029->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rv3029->rtc->range_max = RTC_TIMESTAMP_END_2079;
 
-       rc = rtc_register_device(rv3029->rtc);
+       rc = devm_rtc_register_device(rv3029->rtc);
        if (rc)
                return rc;
 
        nvmem_cfg.priv = rv3029->regmap;
-       rtc_nvmem_register(rv3029->rtc, &nvmem_cfg);
+       devm_rtc_nvmem_register(rv3029->rtc, &nvmem_cfg);
 
        return 0;
 }
index 3e67f71..c9bcea7 100644 (file)
@@ -885,14 +885,14 @@ static int rv3032_probe(struct i2c_client *client)
        rv3032->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rv3032->rtc->range_max = RTC_TIMESTAMP_END_2099;
        rv3032->rtc->ops = &rv3032_rtc_ops;
-       ret = rtc_register_device(rv3032->rtc);
+       ret = devm_rtc_register_device(rv3032->rtc);
        if (ret)
                return ret;
 
-       nvmem_cfg.priv = rv3032;
-       rtc_nvmem_register(rv3032->rtc, &nvmem_cfg);
+       nvmem_cfg.priv = rv3032->regmap;
+       devm_rtc_nvmem_register(rv3032->rtc, &nvmem_cfg);
        eeprom_cfg.priv = rv3032;
-       rtc_nvmem_register(rv3032->rtc, &eeprom_cfg);
+       devm_rtc_nvmem_register(rv3032->rtc, &eeprom_cfg);
 
        rv3032->rtc->max_user_freq = 1;
 
index c6d8e34..d4ea6db 100644 (file)
@@ -585,14 +585,13 @@ static int rv8803_probe(struct i2c_client *client,
        }
 
        rv8803->rtc->ops = &rv8803_rtc_ops;
-       rv8803->rtc->nvram_old_abi = true;
        rv8803->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rv8803->rtc->range_max = RTC_TIMESTAMP_END_2099;
-       err = rtc_register_device(rv8803->rtc);
+       err = devm_rtc_register_device(rv8803->rtc);
        if (err)
                return err;
 
-       rtc_nvmem_register(rv8803->rtc, &nvmem_cfg);
+       devm_rtc_nvmem_register(rv8803->rtc, &nvmem_cfg);
 
        rv8803->rtc->max_user_freq = 1;
 
index 3a9eb70..a7b671a 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
+#include <linux/i2c.h>
 
 /* RX-6110 Register definitions */
 #define RX6110_REG_SEC         0x10
@@ -310,6 +311,27 @@ static const struct rtc_class_ops rx6110_rtc_ops = {
        .set_time = rx6110_set_time,
 };
 
+static int rx6110_probe(struct rx6110_data *rx6110, struct device *dev)
+{
+       int err;
+
+       rx6110->rtc = devm_rtc_device_register(dev,
+                                              RX6110_DRIVER_NAME,
+                                              &rx6110_rtc_ops, THIS_MODULE);
+
+       if (IS_ERR(rx6110->rtc))
+               return PTR_ERR(rx6110->rtc);
+
+       err = rx6110_init(rx6110);
+       if (err)
+               return err;
+
+       rx6110->rtc->max_user_freq = 1;
+
+       return 0;
+}
+
+#ifdef CONFIG_SPI_MASTER
 static struct regmap_config regmap_spi_config = {
        .reg_bits = 8,
        .val_bits = 8,
@@ -318,13 +340,12 @@ static struct regmap_config regmap_spi_config = {
 };
 
 /**
- * rx6110_probe - initialize rtc driver
+ * rx6110_spi_probe - initialize rtc driver
  * @spi: pointer to spi device
  */
-static int rx6110_probe(struct spi_device *spi)
+static int rx6110_spi_probe(struct spi_device *spi)
 {
        struct rx6110_data *rx6110;
-       int err;
 
        if ((spi->bits_per_word && spi->bits_per_word != 8) ||
            (spi->max_speed_hz > 2000000) ||
@@ -346,27 +367,14 @@ static int rx6110_probe(struct spi_device *spi)
 
        spi_set_drvdata(spi, rx6110);
 
-       rx6110->rtc = devm_rtc_device_register(&spi->dev,
-                                              RX6110_DRIVER_NAME,
-                                              &rx6110_rtc_ops, THIS_MODULE);
-
-       if (IS_ERR(rx6110->rtc))
-               return PTR_ERR(rx6110->rtc);
-
-       err = rx6110_init(rx6110);
-       if (err)
-               return err;
-
-       rx6110->rtc->max_user_freq = 1;
-
-       return 0;
+       return rx6110_probe(rx6110, &spi->dev);
 }
 
-static const struct spi_device_id rx6110_id[] = {
+static const struct spi_device_id rx6110_spi_id[] = {
        { "rx6110", 0 },
        { }
 };
-MODULE_DEVICE_TABLE(spi, rx6110_id);
+MODULE_DEVICE_TABLE(spi, rx6110_spi_id);
 
 static const struct of_device_id rx6110_spi_of_match[] = {
        { .compatible = "epson,rx6110" },
@@ -374,16 +382,127 @@ static const struct of_device_id rx6110_spi_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, rx6110_spi_of_match);
 
-static struct spi_driver rx6110_driver = {
+static struct spi_driver rx6110_spi_driver = {
        .driver = {
                .name = RX6110_DRIVER_NAME,
                .of_match_table = of_match_ptr(rx6110_spi_of_match),
        },
-       .probe          = rx6110_probe,
-       .id_table       = rx6110_id,
+       .probe          = rx6110_spi_probe,
+       .id_table       = rx6110_spi_id,
+};
+
+static int rx6110_spi_register(void)
+{
+       return spi_register_driver(&rx6110_spi_driver);
+}
+
+static void rx6110_spi_unregister(void)
+{
+       spi_unregister_driver(&rx6110_spi_driver);
+}
+#else
+static int rx6110_spi_register(void)
+{
+       return 0;
+}
+
+static void rx6110_spi_unregister(void)
+{
+}
+#endif /* CONFIG_SPI_MASTER */
+
+#ifdef CONFIG_I2C
+static struct regmap_config regmap_i2c_config = {
+       .reg_bits = 8,
+       .val_bits = 8,
+       .max_register = RX6110_REG_IRQ,
+       .read_flag_mask = 0x80,
 };
 
-module_spi_driver(rx6110_driver);
+static int rx6110_i2c_probe(struct i2c_client *client,
+                           const struct i2c_device_id *id)
+{
+       struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+       struct rx6110_data *rx6110;
+
+       if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA
+                               | I2C_FUNC_SMBUS_I2C_BLOCK)) {
+               dev_err(&adapter->dev,
+                       "doesn't support required functionality\n");
+               return -EIO;
+       }
+
+       rx6110 = devm_kzalloc(&client->dev, sizeof(*rx6110), GFP_KERNEL);
+       if (!rx6110)
+               return -ENOMEM;
+
+       rx6110->regmap = devm_regmap_init_i2c(client, &regmap_i2c_config);
+       if (IS_ERR(rx6110->regmap)) {
+               dev_err(&client->dev, "regmap init failed for rtc rx6110\n");
+               return PTR_ERR(rx6110->regmap);
+       }
+
+       i2c_set_clientdata(client, rx6110);
+
+       return rx6110_probe(rx6110, &client->dev);
+}
+
+static const struct i2c_device_id rx6110_i2c_id[] = {
+       { "rx6110", 0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, rx6110_i2c_id);
+
+static struct i2c_driver rx6110_i2c_driver = {
+       .driver = {
+               .name = RX6110_DRIVER_NAME,
+       },
+       .probe          = rx6110_i2c_probe,
+       .id_table       = rx6110_i2c_id,
+};
+
+static int rx6110_i2c_register(void)
+{
+       return i2c_add_driver(&rx6110_i2c_driver);
+}
+
+static void rx6110_i2c_unregister(void)
+{
+       i2c_del_driver(&rx6110_i2c_driver);
+}
+#else
+static int rx6110_i2c_register(void)
+{
+       return 0;
+}
+
+static void rx6110_i2c_unregister(void)
+{
+}
+#endif /* CONFIG_I2C */
+
+static int __init rx6110_module_init(void)
+{
+       int ret;
+
+       ret = rx6110_spi_register();
+       if (ret)
+               return ret;
+
+       ret = rx6110_i2c_register();
+       if (ret)
+               rx6110_spi_unregister();
+
+       return ret;
+}
+module_init(rx6110_module_init);
+
+static void __exit rx6110_module_exit(void)
+{
+       rx6110_spi_unregister();
+       rx6110_i2c_unregister();
+}
+module_exit(rx6110_module_exit);
 
 MODULE_AUTHOR("Val Krutov <val.krutov@erd.epson.com>");
 MODULE_DESCRIPTION("RX-6110 SA RTC driver");
index dca41a2..8340ab4 100644 (file)
@@ -419,7 +419,7 @@ static int rx8010_probe(struct i2c_client *client)
        rx8010->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rx8010->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       return rtc_register_device(rx8010->rtc);
+       return devm_rtc_register_device(rx8010->rtc);
 }
 
 static struct i2c_driver rx8010_driver = {
index 490f70f..de10913 100644 (file)
@@ -298,11 +298,11 @@ static int rx8581_probe(struct i2c_client *client,
        rx8581->rtc->start_secs = 0;
        rx8581->rtc->set_start_time = true;
 
-       ret = rtc_register_device(rx8581->rtc);
+       ret = devm_rtc_register_device(rx8581->rtc);
 
        for (i = 0; i < config->num_nvram; i++) {
                nvmem_cfg[i].priv = rx8581;
-               rtc_nvmem_register(rx8581->rtc, &nvmem_cfg[i]);
+               devm_rtc_nvmem_register(rx8581->rtc, &nvmem_cfg[i]);
        }
 
        return ret;
index 03672a2..ea15d03 100644 (file)
@@ -497,7 +497,7 @@ static int s35390a_probe(struct i2c_client *client,
        if (status1 & S35390A_FLAG_INT2)
                rtc_update_irq(s35390a->rtc, 1, RTC_AF);
 
-       return rtc_register_device(s35390a->rtc);
+       return devm_rtc_register_device(s35390a->rtc);
 }
 
 static struct i2c_driver s35390a_driver = {
index 24a4190..fab326b 100644 (file)
@@ -42,26 +42,15 @@ struct s3c_rtc {
        const struct s3c_rtc_data *data;
 
        int irq_alarm;
-       int irq_tick;
-
-       spinlock_t pie_lock;
        spinlock_t alarm_lock;
 
-       int ticnt_save;
-       int ticnt_en_save;
        bool wake_en;
 };
 
 struct s3c_rtc_data {
-       int max_user_freq;
        bool needs_src_clk;
 
        void (*irq_handler) (struct s3c_rtc *info, int mask);
-       void (*set_freq) (struct s3c_rtc *info, int freq);
-       void (*enable_tick) (struct s3c_rtc *info, struct seq_file *seq);
-       void (*select_tick_clk) (struct s3c_rtc *info);
-       void (*save_tick_cnt) (struct s3c_rtc *info);
-       void (*restore_tick_cnt) (struct s3c_rtc *info);
        void (*enable) (struct s3c_rtc *info);
        void (*disable) (struct s3c_rtc *info);
 };
@@ -91,17 +80,7 @@ static void s3c_rtc_disable_clk(struct s3c_rtc *info)
        clk_disable(info->rtc_clk);
 }
 
-/* IRQ Handlers */
-static irqreturn_t s3c_rtc_tickirq(int irq, void *id)
-{
-       struct s3c_rtc *info = (struct s3c_rtc *)id;
-
-       if (info->data->irq_handler)
-               info->data->irq_handler(info, S3C2410_INTP_TIC);
-
-       return IRQ_HANDLED;
-}
-
+/* IRQ Handler */
 static irqreturn_t s3c_rtc_alarmirq(int irq, void *id)
 {
        struct s3c_rtc *info = (struct s3c_rtc *)id;
@@ -148,28 +127,6 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
        return ret;
 }
 
-/* Set RTC frequency */
-static int s3c_rtc_setfreq(struct s3c_rtc *info, int freq)
-{
-       int ret;
-
-       if (!is_power_of_2(freq))
-               return -EINVAL;
-
-       ret = s3c_rtc_enable_clk(info);
-       if (ret)
-               return ret;
-       spin_lock_irq(&info->pie_lock);
-
-       if (info->data->set_freq)
-               info->data->set_freq(info, freq);
-
-       spin_unlock_irq(&info->pie_lock);
-       s3c_rtc_disable_clk(info);
-
-       return 0;
-}
-
 /* Time read/write */
 static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 {
@@ -348,29 +305,11 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
        return 0;
 }
 
-static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
-{
-       struct s3c_rtc *info = dev_get_drvdata(dev);
-       int ret;
-
-       ret = s3c_rtc_enable_clk(info);
-       if (ret)
-               return ret;
-
-       if (info->data->enable_tick)
-               info->data->enable_tick(info, seq);
-
-       s3c_rtc_disable_clk(info);
-
-       return 0;
-}
-
 static const struct rtc_class_ops s3c_rtcops = {
        .read_time      = s3c_rtc_gettime,
        .set_time       = s3c_rtc_settime,
        .read_alarm     = s3c_rtc_getalarm,
        .set_alarm      = s3c_rtc_setalarm,
-       .proc           = s3c_rtc_proc,
        .alarm_irq_enable = s3c_rtc_setaie,
 };
 
@@ -450,18 +389,12 @@ static int s3c_rtc_probe(struct platform_device *pdev)
        if (!info)
                return -ENOMEM;
 
-       /* find the IRQs */
-       info->irq_tick = platform_get_irq(pdev, 1);
-       if (info->irq_tick < 0)
-               return info->irq_tick;
-
        info->dev = &pdev->dev;
        info->data = of_device_get_match_data(&pdev->dev);
        if (!info->data) {
                dev_err(&pdev->dev, "failed getting s3c_rtc_data\n");
                return -EINVAL;
        }
-       spin_lock_init(&info->pie_lock);
        spin_lock_init(&info->alarm_lock);
 
        platform_set_drvdata(pdev, info);
@@ -470,8 +403,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
        if (info->irq_alarm < 0)
                return info->irq_alarm;
 
-       dev_dbg(&pdev->dev, "s3c2410_rtc: tick irq %d, alarm irq %d\n",
-               info->irq_tick, info->irq_alarm);
+       dev_dbg(&pdev->dev, "s3c2410_rtc: alarm irq %d\n", info->irq_alarm);
 
        /* get the memory region */
        info->base = devm_platform_ioremap_resource(pdev, 0);
@@ -503,6 +435,10 @@ static int s3c_rtc_probe(struct platform_device *pdev)
                        goto err_src_clk;
        }
 
+       /* disable RTC enable bits potentially set by the bootloader */
+       if (info->data->disable)
+               info->data->disable(info);
+
        /* check to see if everything is setup correctly */
        if (info->data->enable)
                info->data->enable(info);
@@ -542,18 +478,6 @@ static int s3c_rtc_probe(struct platform_device *pdev)
                goto err_nortc;
        }
 
-       ret = devm_request_irq(&pdev->dev, info->irq_tick, s3c_rtc_tickirq,
-                              0, "s3c2410-rtc tick", info);
-       if (ret) {
-               dev_err(&pdev->dev, "IRQ%d error %d\n", info->irq_tick, ret);
-               goto err_nortc;
-       }
-
-       if (info->data->select_tick_clk)
-               info->data->select_tick_clk(info);
-
-       s3c_rtc_setfreq(info, 1);
-
        s3c_rtc_disable_clk(info);
 
        return 0;
@@ -581,10 +505,6 @@ static int s3c_rtc_suspend(struct device *dev)
        if (ret)
                return ret;
 
-       /* save TICNT for anyone using periodic interrupts */
-       if (info->data->save_tick_cnt)
-               info->data->save_tick_cnt(info);
-
        if (info->data->disable)
                info->data->disable(info);
 
@@ -605,9 +525,6 @@ static int s3c_rtc_resume(struct device *dev)
        if (info->data->enable)
                info->data->enable(info);
 
-       if (info->data->restore_tick_cnt)
-               info->data->restore_tick_cnt(info);
-
        s3c_rtc_disable_clk(info);
 
        if (device_may_wakeup(dev) && info->wake_en) {
@@ -631,162 +548,27 @@ static void s3c6410_rtc_irq(struct s3c_rtc *info, int mask)
        writeb(mask, info->base + S3C2410_INTP);
 }
 
-static void s3c2410_rtc_setfreq(struct s3c_rtc *info, int freq)
-{
-       unsigned int tmp = 0;
-       int val;
-
-       tmp = readb(info->base + S3C2410_TICNT);
-       tmp &= S3C2410_TICNT_ENABLE;
-
-       val = (info->rtc->max_user_freq / freq) - 1;
-       tmp |= val;
-
-       writel(tmp, info->base + S3C2410_TICNT);
-}
-
-static void s3c2416_rtc_setfreq(struct s3c_rtc *info, int freq)
-{
-       unsigned int tmp = 0;
-       int val;
-
-       tmp = readb(info->base + S3C2410_TICNT);
-       tmp &= S3C2410_TICNT_ENABLE;
-
-       val = (info->rtc->max_user_freq / freq) - 1;
-
-       tmp |= S3C2443_TICNT_PART(val);
-       writel(S3C2443_TICNT1_PART(val), info->base + S3C2443_TICNT1);
-
-       writel(S3C2416_TICNT2_PART(val), info->base + S3C2416_TICNT2);
-
-       writel(tmp, info->base + S3C2410_TICNT);
-}
-
-static void s3c2443_rtc_setfreq(struct s3c_rtc *info, int freq)
-{
-       unsigned int tmp = 0;
-       int val;
-
-       tmp = readb(info->base + S3C2410_TICNT);
-       tmp &= S3C2410_TICNT_ENABLE;
-
-       val = (info->rtc->max_user_freq / freq) - 1;
-
-       tmp |= S3C2443_TICNT_PART(val);
-       writel(S3C2443_TICNT1_PART(val), info->base + S3C2443_TICNT1);
-
-       writel(tmp, info->base + S3C2410_TICNT);
-}
-
-static void s3c6410_rtc_setfreq(struct s3c_rtc *info, int freq)
-{
-       int val;
-
-       val = (info->rtc->max_user_freq / freq) - 1;
-       writel(val, info->base + S3C2410_TICNT);
-}
-
-static void s3c24xx_rtc_enable_tick(struct s3c_rtc *info, struct seq_file *seq)
-{
-       unsigned int ticnt;
-
-       ticnt = readb(info->base + S3C2410_TICNT);
-       ticnt &= S3C2410_TICNT_ENABLE;
-
-       seq_printf(seq, "periodic_IRQ\t: %s\n", ticnt  ? "yes" : "no");
-}
-
-static void s3c2416_rtc_select_tick_clk(struct s3c_rtc *info)
-{
-       unsigned int con;
-
-       con = readw(info->base + S3C2410_RTCCON);
-       con |= S3C2443_RTCCON_TICSEL;
-       writew(con, info->base + S3C2410_RTCCON);
-}
-
-static void s3c6410_rtc_enable_tick(struct s3c_rtc *info, struct seq_file *seq)
-{
-       unsigned int ticnt;
-
-       ticnt = readw(info->base + S3C2410_RTCCON);
-       ticnt &= S3C64XX_RTCCON_TICEN;
-
-       seq_printf(seq, "periodic_IRQ\t: %s\n", ticnt  ? "yes" : "no");
-}
-
-static void s3c24xx_rtc_save_tick_cnt(struct s3c_rtc *info)
-{
-       info->ticnt_save = readb(info->base + S3C2410_TICNT);
-}
-
-static void s3c24xx_rtc_restore_tick_cnt(struct s3c_rtc *info)
-{
-       writeb(info->ticnt_save, info->base + S3C2410_TICNT);
-}
-
-static void s3c6410_rtc_save_tick_cnt(struct s3c_rtc *info)
-{
-       info->ticnt_en_save = readw(info->base + S3C2410_RTCCON);
-       info->ticnt_en_save &= S3C64XX_RTCCON_TICEN;
-       info->ticnt_save = readl(info->base + S3C2410_TICNT);
-}
-
-static void s3c6410_rtc_restore_tick_cnt(struct s3c_rtc *info)
-{
-       unsigned int con;
-
-       writel(info->ticnt_save, info->base + S3C2410_TICNT);
-       if (info->ticnt_en_save) {
-               con = readw(info->base + S3C2410_RTCCON);
-               writew(con | info->ticnt_en_save, info->base + S3C2410_RTCCON);
-       }
-}
-
 static struct s3c_rtc_data const s3c2410_rtc_data = {
-       .max_user_freq          = 128,
        .irq_handler            = s3c24xx_rtc_irq,
-       .set_freq               = s3c2410_rtc_setfreq,
-       .enable_tick            = s3c24xx_rtc_enable_tick,
-       .save_tick_cnt          = s3c24xx_rtc_save_tick_cnt,
-       .restore_tick_cnt       = s3c24xx_rtc_restore_tick_cnt,
        .enable                 = s3c24xx_rtc_enable,
        .disable                = s3c24xx_rtc_disable,
 };
 
 static struct s3c_rtc_data const s3c2416_rtc_data = {
-       .max_user_freq          = 32768,
        .irq_handler            = s3c24xx_rtc_irq,
-       .set_freq               = s3c2416_rtc_setfreq,
-       .enable_tick            = s3c24xx_rtc_enable_tick,
-       .select_tick_clk        = s3c2416_rtc_select_tick_clk,
-       .save_tick_cnt          = s3c24xx_rtc_save_tick_cnt,
-       .restore_tick_cnt       = s3c24xx_rtc_restore_tick_cnt,
        .enable                 = s3c24xx_rtc_enable,
        .disable                = s3c24xx_rtc_disable,
 };
 
 static struct s3c_rtc_data const s3c2443_rtc_data = {
-       .max_user_freq          = 32768,
        .irq_handler            = s3c24xx_rtc_irq,
-       .set_freq               = s3c2443_rtc_setfreq,
-       .enable_tick            = s3c24xx_rtc_enable_tick,
-       .select_tick_clk        = s3c2416_rtc_select_tick_clk,
-       .save_tick_cnt          = s3c24xx_rtc_save_tick_cnt,
-       .restore_tick_cnt       = s3c24xx_rtc_restore_tick_cnt,
        .enable                 = s3c24xx_rtc_enable,
        .disable                = s3c24xx_rtc_disable,
 };
 
 static struct s3c_rtc_data const s3c6410_rtc_data = {
-       .max_user_freq          = 32768,
        .needs_src_clk          = true,
        .irq_handler            = s3c6410_rtc_irq,
-       .set_freq               = s3c6410_rtc_setfreq,
-       .enable_tick            = s3c6410_rtc_enable_tick,
-       .save_tick_cnt          = s3c6410_rtc_save_tick_cnt,
-       .restore_tick_cnt       = s3c6410_rtc_restore_tick_cnt,
        .enable                 = s3c24xx_rtc_enable,
        .disable                = s3c6410_rtc_disable,
 };
index 9ccc97c..1250887 100644 (file)
@@ -205,7 +205,7 @@ int sa1100_rtc_init(struct platform_device *pdev, struct sa1100_rtc *info)
        info->rtc->max_user_freq = RTC_FREQ;
        info->rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(info->rtc);
+       ret = devm_rtc_register_device(info->rtc);
        if (ret) {
                clk_disable_unprepare(info->clk);
                return ret;
index 36810dd..187aa95 100644 (file)
@@ -299,33 +299,6 @@ static int sprd_rtc_set_secs(struct sprd_rtc *rtc, enum sprd_rtc_reg_types type,
                            sts_mask);
 }
 
-static int sprd_rtc_read_aux_alarm(struct device *dev, struct rtc_wkalrm *alrm)
-{
-       struct sprd_rtc *rtc = dev_get_drvdata(dev);
-       time64_t secs;
-       u32 val;
-       int ret;
-
-       ret = sprd_rtc_get_secs(rtc, SPRD_RTC_AUX_ALARM, &secs);
-       if (ret)
-               return ret;
-
-       rtc_time64_to_tm(secs, &alrm->time);
-
-       ret = regmap_read(rtc->regmap, rtc->base + SPRD_RTC_INT_EN, &val);
-       if (ret)
-               return ret;
-
-       alrm->enabled = !!(val & SPRD_RTC_AUXALM_EN);
-
-       ret = regmap_read(rtc->regmap, rtc->base + SPRD_RTC_INT_RAW_STS, &val);
-       if (ret)
-               return ret;
-
-       alrm->pending = !!(val & SPRD_RTC_AUXALM_EN);
-       return 0;
-}
-
 static int sprd_rtc_set_aux_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct sprd_rtc *rtc = dev_get_drvdata(dev);
@@ -415,16 +388,9 @@ static int sprd_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        u32 val;
 
        /*
-        * Before RTC device is registered, it will check to see if there is an
-        * alarm already set in RTC hardware, and we always read the normal
-        * alarm at this time.
-        *
-        * Or if aie_timer is enabled, we should get the normal alarm time.
-        * Otherwise we should get auxiliary alarm time.
+        * The RTC core checks to see if there is an alarm already set in RTC
+        * hardware, and we always read the normal alarm at this time.
         */
-       if (rtc->rtc && rtc->rtc->registered && rtc->rtc->aie_timer.enabled == 0)
-               return sprd_rtc_read_aux_alarm(dev, alrm);
-
        ret = sprd_rtc_get_secs(rtc, SPRD_RTC_ALARM, &secs);
        if (ret)
                return ret;
@@ -563,7 +529,7 @@ static int sprd_rtc_check_power_down(struct sprd_rtc *rtc)
         * means the RTC has been powered down, so the RTC time values are
         * invalid.
         */
-       rtc->valid = val == SPRD_RTC_POWER_RESET_VALUE ? false : true;
+       rtc->valid = val != SPRD_RTC_POWER_RESET_VALUE;
        return 0;
 }
 
@@ -652,7 +618,7 @@ static int sprd_rtc_probe(struct platform_device *pdev)
        rtc->rtc->ops = &sprd_rtc_ops;
        rtc->rtc->range_min = 0;
        rtc->rtc->range_max = 5662310399LL;
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret) {
                device_init_wakeup(&pdev->dev, 0);
                return ret;
index a7aa943..f6bee69 100644 (file)
@@ -192,7 +192,7 @@ static int sd3078_probe(struct i2c_client *client,
        sd3078->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        sd3078->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       ret = rtc_register_device(sd3078->rtc);
+       ret = devm_rtc_register_device(sd3078->rtc);
        if (ret)
                return ret;
 
index 9167b48..cd146b5 100644 (file)
@@ -607,7 +607,7 @@ static int __init sh_rtc_probe(struct platform_device *pdev)
                rtc->rtc_dev->range_max = mktime64(2098, 12, 31, 23, 59, 59);
        }
 
-       ret = rtc_register_device(rtc->rtc_dev);
+       ret = devm_rtc_register_device(rtc->rtc_dev);
        if (ret)
                goto err_unmap;
 
index abf1943..03a6cca 100644 (file)
@@ -356,7 +356,7 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev)
                return err;
        }
 
-       return rtc_register_device(rtcdrv->rtc);
+       return devm_rtc_register_device(rtcdrv->rtc);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 0263d99..bd929b0 100644 (file)
@@ -151,17 +151,14 @@ static int snvs_rtc_read_time(struct device *dev, struct rtc_time *tm)
        unsigned long time;
        int ret;
 
-       if (data->clk) {
-               ret = clk_enable(data->clk);
-               if (ret)
-                       return ret;
-       }
+       ret = clk_enable(data->clk);
+       if (ret)
+               return ret;
 
        time = rtc_read_lp_counter(data);
        rtc_time64_to_tm(time, tm);
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return 0;
 }
@@ -172,11 +169,9 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
        unsigned long time = rtc_tm_to_time64(tm);
        int ret;
 
-       if (data->clk) {
-               ret = clk_enable(data->clk);
-               if (ret)
-                       return ret;
-       }
+       ret = clk_enable(data->clk);
+       if (ret)
+               return ret;
 
        /* Disable RTC first */
        ret = snvs_rtc_enable(data, false);
@@ -190,8 +185,7 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
        /* Enable RTC again */
        ret = snvs_rtc_enable(data, true);
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return ret;
 }
@@ -202,11 +196,9 @@ static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        u32 lptar, lpsr;
        int ret;
 
-       if (data->clk) {
-               ret = clk_enable(data->clk);
-               if (ret)
-                       return ret;
-       }
+       ret = clk_enable(data->clk);
+       if (ret)
+               return ret;
 
        regmap_read(data->regmap, data->offset + SNVS_LPTAR, &lptar);
        rtc_time64_to_tm(lptar, &alrm->time);
@@ -214,8 +206,7 @@ static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        regmap_read(data->regmap, data->offset + SNVS_LPSR, &lpsr);
        alrm->pending = (lpsr & SNVS_LPSR_LPTA) ? 1 : 0;
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return 0;
 }
@@ -225,11 +216,9 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
        struct snvs_rtc_data *data = dev_get_drvdata(dev);
        int ret;
 
-       if (data->clk) {
-               ret = clk_enable(data->clk);
-               if (ret)
-                       return ret;
-       }
+       ret = clk_enable(data->clk);
+       if (ret)
+               return ret;
 
        regmap_update_bits(data->regmap, data->offset + SNVS_LPCR,
                           (SNVS_LPCR_LPTA_EN | SNVS_LPCR_LPWUI_EN),
@@ -237,8 +226,7 @@ static int snvs_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
 
        ret = rtc_write_sync_lp(data);
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return ret;
 }
@@ -249,11 +237,9 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        unsigned long time = rtc_tm_to_time64(&alrm->time);
        int ret;
 
-       if (data->clk) {
-               ret = clk_enable(data->clk);
-               if (ret)
-                       return ret;
-       }
+       ret = clk_enable(data->clk);
+       if (ret)
+               return ret;
 
        regmap_update_bits(data->regmap, data->offset + SNVS_LPCR, SNVS_LPCR_LPTA_EN, 0);
        ret = rtc_write_sync_lp(data);
@@ -264,8 +250,7 @@ static int snvs_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        /* Clear alarm interrupt status bit */
        regmap_write(data->regmap, data->offset + SNVS_LPSR, SNVS_LPSR_LPTA);
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return snvs_rtc_alarm_irq_enable(dev, alrm->enabled);
 }
@@ -285,8 +270,7 @@ static irqreturn_t snvs_rtc_irq_handler(int irq, void *dev_id)
        u32 lpsr;
        u32 events = 0;
 
-       if (data->clk)
-               clk_enable(data->clk);
+       clk_enable(data->clk);
 
        regmap_read(data->regmap, data->offset + SNVS_LPSR, &lpsr);
 
@@ -302,8 +286,7 @@ static irqreturn_t snvs_rtc_irq_handler(int irq, void *dev_id)
        /* clear interrupt status */
        regmap_write(data->regmap, data->offset + SNVS_LPSR, lpsr);
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return events ? IRQ_HANDLED : IRQ_NONE;
 }
@@ -316,8 +299,7 @@ static const struct regmap_config snvs_rtc_config = {
 
 static void snvs_rtc_action(void *data)
 {
-       if (data)
-               clk_disable_unprepare(data);
+       clk_disable_unprepare(data);
 }
 
 static int snvs_rtc_probe(struct platform_device *pdev)
@@ -405,15 +387,14 @@ static int snvs_rtc_probe(struct platform_device *pdev)
        data->rtc->ops = &snvs_rtc_ops;
        data->rtc->range_max = U32_MAX;
 
-       return rtc_register_device(data->rtc);
+       return devm_rtc_register_device(data->rtc);
 }
 
 static int __maybe_unused snvs_rtc_suspend_noirq(struct device *dev)
 {
        struct snvs_rtc_data *data = dev_get_drvdata(dev);
 
-       if (data->clk)
-               clk_disable(data->clk);
+       clk_disable(data->clk);
 
        return 0;
 }
index 0c65448..bdb20f6 100644 (file)
@@ -250,7 +250,7 @@ static int st_rtc_probe(struct platform_device *pdev)
        rtc->rtc_dev->range_max = U64_MAX;
        do_div(rtc->rtc_dev->range_max, rtc->clkrate);
 
-       ret = rtc_register_device(rtc->rtc_dev);
+       ret = devm_rtc_register_device(rtc->rtc_dev);
        if (ret) {
                clk_disable_unprepare(rtc->clk);
                return ret;
index 37a2627..fbd1ed4 100644 (file)
@@ -48,7 +48,7 @@ static int __init starfire_rtc_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, rtc);
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct platform_driver starfire_rtc_driver = {
index 01a4504..7cb6be1 100644 (file)
@@ -311,14 +311,13 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(pdata->rtc);
 
        pdata->rtc->ops = &stk17ta8_rtc_ops;
-       pdata->rtc->nvram_old_abi = true;
 
        nvmem_cfg.priv = pdata;
-       ret = rtc_nvmem_register(pdata->rtc, &nvmem_cfg);
+       ret = devm_rtc_nvmem_register(pdata->rtc, &nvmem_cfg);
        if (ret)
                return ret;
 
-       return rtc_register_device(pdata->rtc);
+       return devm_rtc_register_device(pdata->rtc);
 }
 
 /* work with hotplug and coldplug */
index 0a969af..40c0f7e 100644 (file)
@@ -366,7 +366,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev)
        rtc_data->rtc->ops = &stmp3xxx_rtc_ops;
        rtc_data->rtc->range_max = U32_MAX;
 
-       err = rtc_register_device(rtc_data->rtc);
+       err = devm_rtc_register_device(rtc_data->rtc);
        if (err)
                return err;
 
index 036463d..a86e27d 100644 (file)
@@ -86,7 +86,7 @@ static int __init sun4v_rtc_probe(struct platform_device *pdev)
        rtc->range_max = U64_MAX;
        platform_set_drvdata(pdev, rtc);
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct platform_driver sun4v_rtc_driver = {
index e2b8b15..adec1b1 100644 (file)
@@ -272,7 +272,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
                                                                300000000);
        if (IS_ERR(rtc->int_osc)) {
                pr_crit("Couldn't register the internal oscillator\n");
-               return;
+               goto err;
        }
 
        parents[0] = clk_hw_get_name(rtc->int_osc);
@@ -290,7 +290,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
        rtc->losc = clk_register(NULL, &rtc->hw);
        if (IS_ERR(rtc->losc)) {
                pr_crit("Couldn't register the LOSC clock\n");
-               return;
+               goto err_register;
        }
 
        of_property_read_string_index(node, "clock-output-names", 1,
@@ -301,7 +301,7 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
                                          &rtc->lock);
        if (IS_ERR(rtc->ext_losc)) {
                pr_crit("Couldn't register the LOSC external gate\n");
-               return;
+               goto err_register;
        }
 
        clk_data->num = 2;
@@ -314,6 +314,8 @@ static void __init sun6i_rtc_clk_init(struct device_node *node,
        of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data);
        return;
 
+err_register:
+       clk_hw_unregister_fixed_rate(rtc->int_osc);
 err:
        kfree(clk_data);
 }
@@ -724,7 +726,7 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
        chip->rtc->ops = &sun6i_rtc_ops;
        chip->rtc->range_max = 2019686399LL; /* 2033-12-31 23:59:59 */
 
-       ret = rtc_register_device(chip->rtc);
+       ret = devm_rtc_register_device(chip->rtc);
        if (ret)
                return ret;
 
index f5d7f44..5d019e3 100644 (file)
@@ -470,7 +470,7 @@ static int sunxi_rtc_probe(struct platform_device *pdev)
 
        chip->rtc->ops = &sunxi_rtc_ops;
 
-       return rtc_register_device(chip->rtc);
+       return devm_rtc_register_device(chip->rtc);
 }
 
 static struct platform_driver sunxi_rtc_driver = {
index 7fbb174..8925015 100644 (file)
@@ -329,7 +329,7 @@ static int tegra_rtc_probe(struct platform_device *pdev)
                goto disable_clk;
        }
 
-       ret = rtc_register_device(info->rtc);
+       ret = devm_rtc_register_device(info->rtc);
        if (ret)
                goto disable_clk;
 
index 74b3a06..7e0d8fb 100644 (file)
@@ -50,7 +50,6 @@ static int test_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        if (expires > U32_MAX)
                expires = U32_MAX;
 
-       pr_err("ABE: %s +%d %s\n", __FILE__, __LINE__, __func__);
        rtd->alarm.expires = expires;
 
        if (alrm->enabled)
@@ -139,7 +138,7 @@ static int test_probe(struct platform_device *plat_dev)
        timer_setup(&rtd->alarm, test_rtc_alarm_handler, 0);
        rtd->alarm.expires = 0;
 
-       return rtc_register_device(rtd->rtc);
+       return devm_rtc_register_device(rtd->rtc);
 }
 
 static struct platform_driver test_driver = {
index e39af2d..a980337 100644 (file)
@@ -280,7 +280,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
                goto fail_rtc_register;
        }
 
-       ret = rtc_register_device(rtc->rtc);
+       ret = devm_rtc_register_device(rtc->rtc);
        if (ret)
                goto fail_rtc_register;
 
index e384038..2d87b62 100644 (file)
@@ -434,7 +434,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
        tps_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        tps_rtc->rtc->range_max = RTC_TIMESTAMP_END_2099;
 
-       return rtc_register_device(tps_rtc->rtc);
+       return devm_rtc_register_device(tps_rtc->rtc);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 715b829..c3309db 100644 (file)
@@ -266,17 +266,16 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
                return PTR_ERR(rtc);
 
        rtc->ops = &tx4939_rtc_ops;
-       rtc->nvram_old_abi = true;
        rtc->range_max = U32_MAX;
 
        pdata->rtc = rtc;
 
        nvmem_cfg.priv = pdata;
-       ret = rtc_nvmem_register(rtc, &nvmem_cfg);
+       ret = devm_rtc_nvmem_register(rtc, &nvmem_cfg);
        if (ret)
                return ret;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static int __exit tx4939_rtc_remove(struct platform_device *pdev)
index c367104..5a9f9ad 100644 (file)
@@ -335,7 +335,7 @@ static int rtc_probe(struct platform_device *pdev)
 
        dev_info(&pdev->dev, "Real Time Clock of NEC VR4100 series\n");
 
-       retval = rtc_register_device(rtc);
+       retval = devm_rtc_register_device(rtc);
        if (retval)
                goto err_iounmap_all;
 
index e258862..197b649 100644 (file)
@@ -232,7 +232,7 @@ static int vt8500_rtc_probe(struct platform_device *pdev)
                return ret;
        }
 
-       return rtc_register_device(vt8500_rtc->rtc);
+       return devm_rtc_register_device(vt8500_rtc->rtc);
 }
 
 static int vt8500_rtc_remove(struct platform_device *pdev)
index ff46066..2a205a6 100644 (file)
@@ -176,7 +176,7 @@ static int wilco_ec_rtc_probe(struct platform_device *pdev)
        rtc->range_max = RTC_TIMESTAMP_END_2099;
        rtc->owner = THIS_MODULE;
 
-       return rtc_register_device(rtc);
+       return devm_rtc_register_device(rtc);
 }
 
 static struct platform_driver wilco_ec_rtc_driver = {
index ccef887..640833e 100644 (file)
@@ -429,7 +429,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
        wm831x_rtc->rtc->ops = &wm831x_rtc_ops;
        wm831x_rtc->rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(wm831x_rtc->rtc);
+       ret = devm_rtc_register_device(wm831x_rtc->rtc);
        if (ret)
                return ret;
 
index 96db441..cf68a9b 100644 (file)
@@ -185,7 +185,7 @@ static int xgene_rtc_probe(struct platform_device *pdev)
        pdata->rtc->ops = &xgene_rtc_ops;
        pdata->rtc->range_max = U32_MAX;
 
-       ret = rtc_register_device(pdata->rtc);
+       ret = devm_rtc_register_device(pdata->rtc);
        if (ret) {
                clk_disable_unprepare(pdata->clk);
                return ret;
index 4b1077e..f440bb5 100644 (file)
@@ -264,7 +264,7 @@ static int xlnx_rtc_probe(struct platform_device *pdev)
 
        device_init_wakeup(&pdev->dev, 1);
 
-       return rtc_register_device(xrtcdev->rtc);
+       return devm_rtc_register_device(xrtcdev->rtc);
 }
 
 static int xlnx_rtc_remove(struct platform_device *pdev)
index 950fac0..8a957d3 100644 (file)
@@ -317,8 +317,6 @@ int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps)
        size_t old_cnt = 0, add_cnt = 0, new_cnt;
        const struct attribute_group **groups, **old;
 
-       if (rtc->registered)
-               return -EINVAL;
        if (!grps)
                return -EINVAL;
 
index 36583dc..4b0a7cb 100644 (file)
@@ -1681,7 +1681,7 @@ void ccw_device_wait_idle(struct ccw_device *cdev)
                cio_tsch(sch);
                if (sch->schib.scsw.cmd.actl == 0)
                        break;
-               udelay_simple(100);
+               udelay(100);
        }
 }
 #endif
index 226a561..62ceeb7 100644 (file)
@@ -175,7 +175,7 @@ static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
        atomic_set(&zq->load, 0);
        ap_queue_init_state(aq);
        ap_queue_init_reply(aq, &zq->reply);
-       aq->request_timeout = CEX2A_CLEANUP_TIME,
+       aq->request_timeout = CEX2A_CLEANUP_TIME;
        aq->private = zq;
        rc = zcrypt_queue_register(zq);
        if (rc) {
index f5195bc..f4a6d37 100644 (file)
@@ -631,7 +631,7 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
        atomic_set(&zq->load, 0);
        ap_queue_init_state(aq);
        ap_queue_init_reply(aq, &zq->reply);
-       aq->request_timeout = CEX4_CLEANUP_TIME,
+       aq->request_timeout = CEX4_CLEANUP_TIME;
        aq->private = zq;
        rc = zcrypt_queue_register(zq);
        if (rc) {
index 10b4be1..4789d36 100644 (file)
@@ -450,9 +450,9 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
                vma_set_anonymous(vma);
        }
 
-       if (vma->vm_file)
-               fput(vma->vm_file);
-       vma->vm_file = asma->file;
+       vma_set_file(vma, asma->file);
+       /* XXX: merge this with the get_file() above if possible */
+       fput(asma->file);
 
 out:
        mutex_unlock(&ashmem_mutex);
index 0966551..823354a 100644 (file)
@@ -584,6 +584,7 @@ static int int3400_thermal_remove(struct platform_device *pdev)
 static const struct acpi_device_id int3400_thermal_match[] = {
        {"INT3400", 0},
        {"INTC1040", 0},
+       {"INTC1041", 0},
        {}
 };
 
index ec1d58c..c3c4c4d 100644 (file)
@@ -284,6 +284,7 @@ static int int3403_remove(struct platform_device *pdev)
 static const struct acpi_device_id int3403_device_ids[] = {
        {"INT3403", 0},
        {"INTC1043", 0},
+       {"INTC1046", 0},
        {"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
index 6caf539..92a6396 100644 (file)
@@ -9,21 +9,24 @@ menuconfig VDPA
 if VDPA
 
 config VDPA_SIM
-       tristate "vDPA device simulator"
+       tristate "vDPA device simulator core"
        depends on RUNTIME_TESTING_MENU && HAS_DMA
        select DMA_OPS
        select VHOST_RING
+       help
+         Enable this module to support vDPA device simulators. These devices
+         are used for testing, prototyping and development of vDPA.
+
+config VDPA_SIM_NET
+       tristate "vDPA simulator for networking device"
+       depends on VDPA_SIM
        select GENERIC_NET_UTILS
-       default n
        help
-         vDPA networking device simulator which loop TX traffic back
-         to RX. This device is used for testing, prototyping and
-         development of vDPA.
+         vDPA networking device simulator which loops TX traffic back to RX.
 
 config IFCVF
        tristate "Intel IFC VF vDPA driver"
        depends on PCI_MSI
-       default n
        help
          This kernel module can drive Intel IFC VF NIC to offload
          virtio dataplane traffic to hardware.
@@ -42,7 +45,6 @@ config MLX5_VDPA_NET
        tristate "vDPA driver for ConnectX devices"
        select MLX5_VDPA
        depends on MLX5_CORE
-       default n
        help
          VDPA network driver for ConnectX6 and newer. Provides offloading
          of virtio net datapath such that descriptors put on the ring will
index 8b40285..fa1af30 100644 (file)
@@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                return ret;
        }
 
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
        if (ret) {
-               IFCVF_ERR(pdev, "No usable DMA confiugration\n");
-               return ret;
-       }
-
-       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (ret) {
-               IFCVF_ERR(pdev,
-                         "No usable coherent DMA confiugration\n");
+               IFCVF_ERR(pdev, "No usable DMA configuration\n");
                return ret;
        }
 
index f1d5481..88dde34 100644 (file)
@@ -479,6 +479,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
 {
        mlx5_cq_set_ci(&mvq->cq.mcq);
+
+       /* make sure CQ cosumer update is visible to the hardware before updating
+        * RX doorbell record.
+        */
+       dma_wmb();
        rx_post(&mvq->vqqp, num);
        if (mvq->event_cb.callback)
                mvq->event_cb.callback(mvq->event_cb.private);
index a69ffc9..c082565 100644 (file)
@@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
        if (!vdev)
                goto err;
 
-       err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL);
+       err = ida_alloc(&vdpa_index_ida, GFP_KERNEL);
        if (err < 0)
                goto err_ida;
 
index b40278f..79d4536 100644 (file)
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
+obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o
index 6a90fdb..b3fcc67 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * VDPA networking device simulator.
+ * VDPA device simulator core.
  *
  * Copyright (c) 2020, Red Hat Inc. All rights reserved.
  *     Author: Jason Wang <jasowang@redhat.com>
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/uuid.h>
-#include <linux/iommu.h>
 #include <linux/dma-map-ops.h>
-#include <linux/sysfs.h>
-#include <linux/file.h>
-#include <linux/etherdevice.h>
 #include <linux/vringh.h>
 #include <linux/vdpa.h>
-#include <linux/virtio_byteorder.h>
 #include <linux/vhost_iotlb.h>
-#include <uapi/linux/virtio_config.h>
-#include <uapi/linux/virtio_net.h>
+
+#include "vdpa_sim.h"
 
 #define DRV_VERSION  "0.1"
 #define DRV_AUTHOR   "Jason Wang <jasowang@redhat.com>"
-#define DRV_DESC     "vDPA Device Simulator"
+#define DRV_DESC     "vDPA Device Simulator core"
 #define DRV_LICENSE  "GPL v2"
 
 static int batch_mapping = 1;
 module_param(batch_mapping, int, 0444);
 MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
 
-static char *macaddr;
-module_param(macaddr, charp, 0);
-MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
-
-struct vdpasim_virtqueue {
-       struct vringh vring;
-       struct vringh_kiov iov;
-       unsigned short head;
-       bool ready;
-       u64 desc_addr;
-       u64 device_addr;
-       u64 driver_addr;
-       u32 num;
-       void *private;
-       irqreturn_t (*cb)(void *data);
-};
+static int max_iotlb_entries = 2048;
+module_param(max_iotlb_entries, int, 0444);
+MODULE_PARM_DESC(max_iotlb_entries,
+                "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)");
 
 #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
 #define VDPASIM_QUEUE_MAX 256
-#define VDPASIM_DEVICE_ID 0x1
 #define VDPASIM_VENDOR_ID 0
-#define VDPASIM_VQ_NUM 0x2
-#define VDPASIM_NAME "vdpasim-netdev"
-
-static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
-                             (1ULL << VIRTIO_F_VERSION_1)  |
-                             (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
-                             (1ULL << VIRTIO_NET_F_MAC);
-
-/* State of each vdpasim device */
-struct vdpasim {
-       struct vdpa_device vdpa;
-       struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM];
-       struct work_struct work;
-       /* spinlock to synchronize virtqueue state */
-       spinlock_t lock;
-       struct virtio_net_config config;
-       struct vhost_iotlb *iommu;
-       void *buffer;
-       u32 status;
-       u32 generation;
-       u64 features;
-       /* spinlock to synchronize iommu table */
-       spinlock_t iommu_lock;
-};
-
-/* TODO: cross-endian support */
-static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
-{
-       return virtio_legacy_is_little_endian() ||
-               (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
-}
-
-static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
-{
-       return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
-}
-
-static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
-{
-       return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
-}
-
-static struct vdpasim *vdpasim_dev;
 
 static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
 {
@@ -115,20 +50,34 @@ static struct vdpasim *dev_to_sim(struct device *dev)
        return vdpa_to_sim(vdpa);
 }
 
+static void vdpasim_vq_notify(struct vringh *vring)
+{
+       struct vdpasim_virtqueue *vq =
+               container_of(vring, struct vdpasim_virtqueue, vring);
+
+       if (!vq->cb)
+               return;
+
+       vq->cb(vq->private);
+}
+
 static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
 {
        struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
 
-       vringh_init_iotlb(&vq->vring, vdpasim_features,
+       vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
                          VDPASIM_QUEUE_MAX, false,
                          (struct vring_desc *)(uintptr_t)vq->desc_addr,
                          (struct vring_avail *)
                          (uintptr_t)vq->driver_addr,
                          (struct vring_used *)
                          (uintptr_t)vq->device_addr);
+
+       vq->vring.notify = vdpasim_vq_notify;
 }
 
-static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
+static void vdpasim_vq_reset(struct vdpasim *vdpasim,
+                            struct vdpasim_virtqueue *vq)
 {
        vq->ready = false;
        vq->desc_addr = 0;
@@ -136,16 +85,18 @@ static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq)
        vq->device_addr = 0;
        vq->cb = NULL;
        vq->private = NULL;
-       vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX,
-                         false, NULL, NULL, NULL);
+       vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features,
+                         VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL);
+
+       vq->vring.notify = NULL;
 }
 
 static void vdpasim_reset(struct vdpasim *vdpasim)
 {
        int i;
 
-       for (i = 0; i < VDPASIM_VQ_NUM; i++)
-               vdpasim_vq_reset(&vdpasim->vqs[i]);
+       for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
+               vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]);
 
        spin_lock(&vdpasim->iommu_lock);
        vhost_iotlb_reset(vdpasim->iommu);
@@ -156,80 +107,6 @@ static void vdpasim_reset(struct vdpasim *vdpasim)
        ++vdpasim->generation;
 }
 
-static void vdpasim_work(struct work_struct *work)
-{
-       struct vdpasim *vdpasim = container_of(work, struct
-                                                vdpasim, work);
-       struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
-       struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
-       ssize_t read, write;
-       size_t total_write;
-       int pkts = 0;
-       int err;
-
-       spin_lock(&vdpasim->lock);
-
-       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
-               goto out;
-
-       if (!txq->ready || !rxq->ready)
-               goto out;
-
-       while (true) {
-               total_write = 0;
-               err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL,
-                                          &txq->head, GFP_ATOMIC);
-               if (err <= 0)
-                       break;
-
-               err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov,
-                                          &rxq->head, GFP_ATOMIC);
-               if (err <= 0) {
-                       vringh_complete_iotlb(&txq->vring, txq->head, 0);
-                       break;
-               }
-
-               while (true) {
-                       read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov,
-                                                    vdpasim->buffer,
-                                                    PAGE_SIZE);
-                       if (read <= 0)
-                               break;
-
-                       write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov,
-                                                     vdpasim->buffer, read);
-                       if (write <= 0)
-                               break;
-
-                       total_write += write;
-               }
-
-               /* Make sure data is wrote before advancing index */
-               smp_wmb();
-
-               vringh_complete_iotlb(&txq->vring, txq->head, 0);
-               vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
-
-               /* Make sure used is visible before rasing the interrupt. */
-               smp_wmb();
-
-               local_bh_disable();
-               if (txq->cb)
-                       txq->cb(txq->private);
-               if (rxq->cb)
-                       rxq->cb(rxq->private);
-               local_bh_enable();
-
-               if (++pkts > 4) {
-                       schedule_work(&vdpasim->work);
-                       goto out;
-               }
-       }
-
-out:
-       spin_unlock(&vdpasim->lock);
-}
-
 static int dir_to_perm(enum dma_data_direction dir)
 {
        int perm = -EFAULT;
@@ -342,26 +219,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
        .free = vdpasim_free_coherent,
 };
 
-static const struct vdpa_config_ops vdpasim_net_config_ops;
-static const struct vdpa_config_ops vdpasim_net_batch_config_ops;
+static const struct vdpa_config_ops vdpasim_config_ops;
+static const struct vdpa_config_ops vdpasim_batch_config_ops;
 
-static struct vdpasim *vdpasim_create(void)
+struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
 {
        const struct vdpa_config_ops *ops;
        struct vdpasim *vdpasim;
        struct device *dev;
-       int ret = -ENOMEM;
+       int i, ret = -ENOMEM;
 
        if (batch_mapping)
-               ops = &vdpasim_net_batch_config_ops;
+               ops = &vdpasim_batch_config_ops;
        else
-               ops = &vdpasim_net_config_ops;
+               ops = &vdpasim_config_ops;
 
-       vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM);
+       vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
+                                   dev_attr->nvqs);
        if (!vdpasim)
                goto err_alloc;
 
-       INIT_WORK(&vdpasim->work, vdpasim_work);
+       vdpasim->dev_attr = *dev_attr;
+       INIT_WORK(&vdpasim->work, dev_attr->work_fn);
        spin_lock_init(&vdpasim->lock);
        spin_lock_init(&vdpasim->iommu_lock);
 
@@ -371,31 +250,27 @@ static struct vdpasim *vdpasim_create(void)
                goto err_iommu;
        set_dma_ops(dev, &vdpasim_dma_ops);
 
-       vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
+       vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
+       if (!vdpasim->config)
+               goto err_iommu;
+
+       vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue),
+                              GFP_KERNEL);
+       if (!vdpasim->vqs)
+               goto err_iommu;
+
+       vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0);
        if (!vdpasim->iommu)
                goto err_iommu;
 
-       vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
        if (!vdpasim->buffer)
                goto err_iommu;
 
-       if (macaddr) {
-               mac_pton(macaddr, vdpasim->config.mac);
-               if (!is_valid_ether_addr(vdpasim->config.mac)) {
-                       ret = -EADDRNOTAVAIL;
-                       goto err_iommu;
-               }
-       } else {
-               eth_random_addr(vdpasim->config.mac);
-       }
-
-       vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
-       vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
+       for (i = 0; i < dev_attr->nvqs; i++)
+               vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu);
 
        vdpasim->vdpa.dma_dev = dev;
-       ret = vdpa_register_device(&vdpasim->vdpa);
-       if (ret)
-               goto err_iommu;
 
        return vdpasim;
 
@@ -404,6 +279,7 @@ err_iommu:
 err_alloc:
        return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(vdpasim_create);
 
 static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx,
                                  u64 desc_area, u64 driver_area,
@@ -498,28 +374,21 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
 
 static u64 vdpasim_get_features(struct vdpa_device *vdpa)
 {
-       return vdpasim_features;
+       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+       return vdpasim->dev_attr.supported_features;
 }
 
 static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features)
 {
        struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
-       struct virtio_net_config *config = &vdpasim->config;
 
        /* DMA mapping must be done by driver */
        if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
                return -EINVAL;
 
-       vdpasim->features = features & vdpasim_features;
-
-       /* We generally only know whether guest is using the legacy interface
-        * here, so generally that's the earliest we can set config fields.
-        * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which
-        * implies VIRTIO_F_VERSION_1, but let's not try to be clever here.
-        */
+       vdpasim->features = features & vdpasim->dev_attr.supported_features;
 
-       config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
-       config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
        return 0;
 }
 
@@ -536,7 +405,9 @@ static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa)
 
 static u32 vdpasim_get_device_id(struct vdpa_device *vdpa)
 {
-       return VDPASIM_DEVICE_ID;
+       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+       return vdpasim->dev_attr.id;
 }
 
 static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa)
@@ -572,14 +443,27 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
 {
        struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
 
-       if (offset + len < sizeof(struct virtio_net_config))
-               memcpy(buf, (u8 *)&vdpasim->config + offset, len);
+       if (offset + len > vdpasim->dev_attr.config_size)
+               return;
+
+       if (vdpasim->dev_attr.get_config)
+               vdpasim->dev_attr.get_config(vdpasim, vdpasim->config);
+
+       memcpy(buf, vdpasim->config + offset, len);
 }
 
 static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset,
                             const void *buf, unsigned int len)
 {
-       /* No writable config supportted by vdpasim */
+       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+       if (offset + len > vdpasim->dev_attr.config_size)
+               return;
+
+       memcpy(vdpasim->config + offset, buf, len);
+
+       if (vdpasim->dev_attr.set_config)
+               vdpasim->dev_attr.set_config(vdpasim, vdpasim->config);
 }
 
 static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
@@ -656,12 +540,14 @@ static void vdpasim_free(struct vdpa_device *vdpa)
        struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
 
        cancel_work_sync(&vdpasim->work);
-       kfree(vdpasim->buffer);
+       kvfree(vdpasim->buffer);
        if (vdpasim->iommu)
                vhost_iotlb_free(vdpasim->iommu);
+       kfree(vdpasim->vqs);
+       kfree(vdpasim->config);
 }
 
-static const struct vdpa_config_ops vdpasim_net_config_ops = {
+static const struct vdpa_config_ops vdpasim_config_ops = {
        .set_vq_address         = vdpasim_set_vq_address,
        .set_vq_num             = vdpasim_set_vq_num,
        .kick_vq                = vdpasim_kick_vq,
@@ -688,7 +574,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
        .free                   = vdpasim_free,
 };
 
-static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
+static const struct vdpa_config_ops vdpasim_batch_config_ops = {
        .set_vq_address         = vdpasim_set_vq_address,
        .set_vq_num             = vdpasim_set_vq_num,
        .kick_vq                = vdpasim_kick_vq,
@@ -714,26 +600,6 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
        .free                   = vdpasim_free,
 };
 
-static int __init vdpasim_dev_init(void)
-{
-       vdpasim_dev = vdpasim_create();
-
-       if (!IS_ERR(vdpasim_dev))
-               return 0;
-
-       return PTR_ERR(vdpasim_dev);
-}
-
-static void __exit vdpasim_dev_exit(void)
-{
-       struct vdpa_device *vdpa = &vdpasim_dev->vdpa;
-
-       vdpa_unregister_device(vdpa);
-}
-
-module_init(vdpasim_dev_init)
-module_exit(vdpasim_dev_exit)
-
 MODULE_VERSION(DRV_VERSION);
 MODULE_LICENSE(DRV_LICENSE);
 MODULE_AUTHOR(DRV_AUTHOR);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
new file mode 100644 (file)
index 0000000..b021422
--- /dev/null
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ */
+
+#ifndef _VDPA_SIM_H
+#define _VDPA_SIM_H
+
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/vhost_iotlb.h>
+#include <uapi/linux/virtio_config.h>
+
+#define VDPASIM_FEATURES       ((1ULL << VIRTIO_F_ANY_LAYOUT) | \
+                                (1ULL << VIRTIO_F_VERSION_1)  | \
+                                (1ULL << VIRTIO_F_ACCESS_PLATFORM))
+
+struct vdpasim;
+
+struct vdpasim_virtqueue {
+       struct vringh vring;
+       struct vringh_kiov in_iov;
+       struct vringh_kiov out_iov;
+       unsigned short head;
+       bool ready;
+       u64 desc_addr;
+       u64 device_addr;
+       u64 driver_addr;
+       u32 num;
+       void *private;
+       irqreturn_t (*cb)(void *data);
+};
+
+struct vdpasim_dev_attr {
+       u64 supported_features;
+       size_t config_size;
+       size_t buffer_size;
+       int nvqs;
+       u32 id;
+
+       work_func_t work_fn;
+       void (*get_config)(struct vdpasim *vdpasim, void *config);
+       void (*set_config)(struct vdpasim *vdpasim, const void *config);
+};
+
+/* State of each vdpasim device */
+struct vdpasim {
+       struct vdpa_device vdpa;
+       struct vdpasim_virtqueue *vqs;
+       struct work_struct work;
+       struct vdpasim_dev_attr dev_attr;
+       /* spinlock to synchronize virtqueue state */
+       spinlock_t lock;
+       /* virtio config according to device type */
+       void *config;
+       struct vhost_iotlb *iommu;
+       void *buffer;
+       u32 status;
+       u32 generation;
+       u64 features;
+       /* spinlock to synchronize iommu table */
+       spinlock_t iommu_lock;
+};
+
+struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr);
+
+/* TODO: cross-endian support */
+static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim)
+{
+       return virtio_legacy_is_little_endian() ||
+               (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1));
+}
+
+static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val)
+{
+       return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val)
+{
+       return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val)
+{
+       return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val)
+{
+       return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val)
+{
+       return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val);
+}
+
+static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val)
+{
+       return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val);
+}
+
+#endif
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
new file mode 100644 (file)
index 0000000..c10b698
--- /dev/null
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA simulator for networking device.
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ *     Author: Jason Wang <jasowang@redhat.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/etherdevice.h>
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <uapi/linux/virtio_net.h>
+
+#include "vdpa_sim.h"
+
+#define DRV_VERSION  "0.1"
+#define DRV_AUTHOR   "Jason Wang <jasowang@redhat.com>"
+#define DRV_DESC     "vDPA Device Simulator for networking device"
+#define DRV_LICENSE  "GPL v2"
+
+#define VDPASIM_NET_FEATURES   (VDPASIM_FEATURES | \
+                                (1ULL << VIRTIO_NET_F_MAC))
+
+#define VDPASIM_NET_VQ_NUM     2
+
+static char *macaddr;
+module_param(macaddr, charp, 0);
+MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
+
+u8 macaddr_buf[ETH_ALEN];
+
+static struct vdpasim *vdpasim_net_dev;
+
+static void vdpasim_net_work(struct work_struct *work)
+{
+       struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+       struct vdpasim_virtqueue *txq = &vdpasim->vqs[1];
+       struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0];
+       ssize_t read, write;
+       size_t total_write;
+       int pkts = 0;
+       int err;
+
+       spin_lock(&vdpasim->lock);
+
+       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+               goto out;
+
+       if (!txq->ready || !rxq->ready)
+               goto out;
+
+       while (true) {
+               total_write = 0;
+               err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL,
+                                          &txq->head, GFP_ATOMIC);
+               if (err <= 0)
+                       break;
+
+               err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov,
+                                          &rxq->head, GFP_ATOMIC);
+               if (err <= 0) {
+                       vringh_complete_iotlb(&txq->vring, txq->head, 0);
+                       break;
+               }
+
+               while (true) {
+                       read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov,
+                                                    vdpasim->buffer,
+                                                    PAGE_SIZE);
+                       if (read <= 0)
+                               break;
+
+                       write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov,
+                                                     vdpasim->buffer, read);
+                       if (write <= 0)
+                               break;
+
+                       total_write += write;
+               }
+
+               /* Make sure data is wrote before advancing index */
+               smp_wmb();
+
+               vringh_complete_iotlb(&txq->vring, txq->head, 0);
+               vringh_complete_iotlb(&rxq->vring, rxq->head, total_write);
+
+               /* Make sure used is visible before rasing the interrupt. */
+               smp_wmb();
+
+               local_bh_disable();
+               if (vringh_need_notify_iotlb(&txq->vring) > 0)
+                       vringh_notify(&txq->vring);
+               if (vringh_need_notify_iotlb(&rxq->vring) > 0)
+                       vringh_notify(&rxq->vring);
+               local_bh_enable();
+
+               if (++pkts > 4) {
+                       schedule_work(&vdpasim->work);
+                       goto out;
+               }
+       }
+
+out:
+       spin_unlock(&vdpasim->lock);
+}
+
+static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
+{
+       struct virtio_net_config *net_config =
+               (struct virtio_net_config *)config;
+
+       net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
+       net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
+       memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
+}
+
+static int __init vdpasim_net_init(void)
+{
+       struct vdpasim_dev_attr dev_attr = {};
+       int ret;
+
+       if (macaddr) {
+               mac_pton(macaddr, macaddr_buf);
+               if (!is_valid_ether_addr(macaddr_buf)) {
+                       ret = -EADDRNOTAVAIL;
+                       goto out;
+               }
+       } else {
+               eth_random_addr(macaddr_buf);
+       }
+
+       dev_attr.id = VIRTIO_ID_NET;
+       dev_attr.supported_features = VDPASIM_NET_FEATURES;
+       dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
+       dev_attr.config_size = sizeof(struct virtio_net_config);
+       dev_attr.get_config = vdpasim_net_get_config;
+       dev_attr.work_fn = vdpasim_net_work;
+       dev_attr.buffer_size = PAGE_SIZE;
+
+       vdpasim_net_dev = vdpasim_create(&dev_attr);
+       if (IS_ERR(vdpasim_net_dev)) {
+               ret = PTR_ERR(vdpasim_net_dev);
+               goto out;
+       }
+
+       ret = vdpa_register_device(&vdpasim_net_dev->vdpa);
+       if (ret)
+               goto put_dev;
+
+       return 0;
+
+put_dev:
+       put_device(&vdpasim_net_dev->vdpa.dev);
+out:
+       return ret;
+}
+
+static void __exit vdpasim_net_exit(void)
+{
+       struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa;
+
+       vdpa_unregister_device(vdpa);
+}
+
+module_init(vdpasim_net_init);
+module_exit(vdpasim_net_exit);
+
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);
index 997cb5d..414e98d 100644 (file)
@@ -46,6 +46,9 @@ static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void
        __poll_t flags = key_to_poll(key);
 
        if (flags & EPOLLIN) {
+               u64 cnt;
+               eventfd_ctx_do_read(virqfd->eventfd, &cnt);
+
                /* An event has been signaled, call function */
                if ((!virqfd->handler ||
                     virqfd->handler(virqfd->opaque, virqfd->data)) &&
index 6ff8a50..4ce9f00 100644 (file)
@@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                        if (!vhost_vq_is_setup(vq))
                                continue;
 
-                       if (vhost_scsi_setup_vq_cmds(vq, vq->num))
+                       ret = vhost_scsi_setup_vq_cmds(vq, vq->num);
+                       if (ret)
                                goto destroy_vq_cmds;
                }
 
index 29ed417..ef688c8 100644 (file)
@@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
                return -EFAULT;
        if (vhost_vdpa_config_validate(v, &config))
                return -EINVAL;
-       buf = kvzalloc(config.len, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
 
-       if (copy_from_user(buf, c->buf, config.len)) {
-               kvfree(buf);
-               return -EFAULT;
-       }
+       buf = vmemdup_user(c->buf, config.len);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
 
        ops->set_config(vdpa, config.off, buf, config.len);
 
index b3a041f..32baaf5 100644 (file)
@@ -682,6 +682,7 @@ static void lx_restore_display_ctlr(struct lxfb_par *par)
                case DC_DV_CTL:
                        /* set all ram to dirty */
                        write_dc(par, i, par->dc[i] | DC_DV_CTL_CLEAR_DV_RAM);
+                       break;
 
                case DC_RSVD_1:
                case DC_RSVD_2:
index 0642555..27893fa 100644 (file)
@@ -239,6 +239,7 @@ static u32 to3264(u32 timing, int bpp, int is64)
                fallthrough;
        case 16:
                timing >>= 1;
+               fallthrough;
        case 32:
                break;
        }
index 181e2f1..9fc9ec4 100644 (file)
@@ -27,20 +27,74 @@ static bool unplug_online = true;
 module_param(unplug_online, bool, 0644);
 MODULE_PARM_DESC(unplug_online, "Try to unplug online memory");
 
-enum virtio_mem_mb_state {
+static bool force_bbm;
+module_param(force_bbm, bool, 0444);
+MODULE_PARM_DESC(force_bbm,
+               "Force Big Block Mode. Default is 0 (auto-selection)");
+
+static unsigned long bbm_block_size;
+module_param(bbm_block_size, ulong, 0444);
+MODULE_PARM_DESC(bbm_block_size,
+                "Big Block size in bytes. Default is 0 (auto-detection).");
+
+static bool bbm_safe_unplug = true;
+module_param(bbm_safe_unplug, bool, 0444);
+MODULE_PARM_DESC(bbm_safe_unplug,
+            "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
+
+/*
+ * virtio-mem currently supports the following modes of operation:
+ *
+ * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The
+ *   size of a Sub Block (SB) is determined based on the device block size, the
+ *   pageblock size, and the maximum allocation granularity of the buddy.
+ *   Subblocks within a Linux memory block might either be plugged or unplugged.
+ *   Memory is added/removed to Linux MM in Linux memory block granularity.
+ *
+ * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks.
+ *   Memory is added/removed to Linux MM in Big Block granularity.
+ *
+ * The mode is determined automatically based on the Linux memory block size
+ * and the device block size.
+ *
+ * User space / core MM (auto onlining) is responsible for onlining added
+ * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are
+ * always onlined separately, and all memory within a Linux memory block is
+ * onlined to the same zone - virtio-mem relies on this behavior.
+ */
+
+/*
+ * State of a Linux memory block in SBM.
+ */
+enum virtio_mem_sbm_mb_state {
        /* Unplugged, not added to Linux. Can be reused later. */
-       VIRTIO_MEM_MB_STATE_UNUSED = 0,
+       VIRTIO_MEM_SBM_MB_UNUSED = 0,
        /* (Partially) plugged, not added to Linux. Error on add_memory(). */
-       VIRTIO_MEM_MB_STATE_PLUGGED,
+       VIRTIO_MEM_SBM_MB_PLUGGED,
        /* Fully plugged, fully added to Linux, offline. */
-       VIRTIO_MEM_MB_STATE_OFFLINE,
+       VIRTIO_MEM_SBM_MB_OFFLINE,
        /* Partially plugged, fully added to Linux, offline. */
-       VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL,
+       VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
        /* Fully plugged, fully added to Linux, online. */
-       VIRTIO_MEM_MB_STATE_ONLINE,
+       VIRTIO_MEM_SBM_MB_ONLINE,
        /* Partially plugged, fully added to Linux, online. */
-       VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL,
-       VIRTIO_MEM_MB_STATE_COUNT
+       VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL,
+       VIRTIO_MEM_SBM_MB_COUNT
+};
+
+/*
+ * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks.
+ */
+enum virtio_mem_bbm_bb_state {
+       /* Unplugged, not added to Linux. Can be reused later. */
+       VIRTIO_MEM_BBM_BB_UNUSED = 0,
+       /* Plugged, not added to Linux. Error on add_memory(). */
+       VIRTIO_MEM_BBM_BB_PLUGGED,
+       /* Plugged and added to Linux. */
+       VIRTIO_MEM_BBM_BB_ADDED,
+       /* All online parts are fake-offline, ready to remove. */
+       VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
+       VIRTIO_MEM_BBM_BB_COUNT
 };
 
 struct virtio_mem {
@@ -51,6 +105,7 @@ struct virtio_mem {
 
        /* Workqueue that processes the plug/unplug requests. */
        struct work_struct wq;
+       atomic_t wq_active;
        atomic_t config_changed;
 
        /* Virtqueue for guest->host requests. */
@@ -70,27 +125,13 @@ struct virtio_mem {
 
        /* The device block size (for communicating with the device). */
        uint64_t device_block_size;
-       /* The translated node id. NUMA_NO_NODE in case not specified. */
+       /* The determined node id for all memory of the device. */
        int nid;
        /* Physical start address of the memory region. */
        uint64_t addr;
        /* Maximum region size in bytes. */
        uint64_t region_size;
 
-       /* The subblock size. */
-       uint64_t subblock_size;
-       /* The number of subblocks per memory block. */
-       uint32_t nb_sb_per_mb;
-
-       /* Id of the first memory block of this device. */
-       unsigned long first_mb_id;
-       /* Id of the last memory block of this device. */
-       unsigned long last_mb_id;
-       /* Id of the last usable memory block of this device. */
-       unsigned long last_usable_mb_id;
-       /* Id of the next memory bock to prepare when needed. */
-       unsigned long next_mb_id;
-
        /* The parent resource for all memory added via this device. */
        struct resource *parent_resource;
        /*
@@ -99,31 +140,79 @@ struct virtio_mem {
         */
        const char *resource_name;
 
-       /* Summary of all memory block states. */
-       unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT];
-#define VIRTIO_MEM_NB_OFFLINE_THRESHOLD                10
-
-       /*
-        * One byte state per memory block.
-        *
-        * Allocated via vmalloc(). When preparing new blocks, resized
-        * (alloc+copy+free) when needed (crossing pages with the next mb).
-        * (when crossing pages).
-        *
-        * With 128MB memory blocks, we have states for 512GB of memory in one
-        * page.
-        */
-       uint8_t *mb_state;
-
        /*
-        * $nb_sb_per_mb bit per memory block. Handled similar to mb_state.
-        *
-        * With 4MB subblocks, we manage 128GB of memory in one page.
+        * We don't want to add too much memory if it's not getting onlined,
+        * to avoid running OOM. Besides this threshold, we allow to have at
+        * least two offline blocks at a time (whatever is bigger).
         */
-       unsigned long *sb_bitmap;
+#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD           (1024 * 1024 * 1024)
+       atomic64_t offline_size;
+       uint64_t offline_threshold;
+
+       /* If set, the driver is in SBM, otherwise in BBM. */
+       bool in_sbm;
+
+       union {
+               struct {
+                       /* Id of the first memory block of this device. */
+                       unsigned long first_mb_id;
+                       /* Id of the last usable memory block of this device. */
+                       unsigned long last_usable_mb_id;
+                       /* Id of the next memory bock to prepare when needed. */
+                       unsigned long next_mb_id;
+
+                       /* The subblock size. */
+                       uint64_t sb_size;
+                       /* The number of subblocks per Linux memory block. */
+                       uint32_t sbs_per_mb;
+
+                       /* Summary of all memory block states. */
+                       unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
+
+                       /*
+                        * One byte state per memory block. Allocated via
+                        * vmalloc(). Resized (alloc+copy+free) on demand.
+                        *
+                        * With 128 MiB memory blocks, we have states for 512
+                        * GiB of memory in one 4 KiB page.
+                        */
+                       uint8_t *mb_states;
+
+                       /*
+                        * Bitmap: one bit per subblock. Allocated similar to
+                        * sbm.mb_states.
+                        *
+                        * A set bit means the corresponding subblock is
+                        * plugged, otherwise it's unblocked.
+                        *
+                        * With 4 MiB subblocks, we manage 128 GiB of memory
+                        * in one 4 KiB page.
+                        */
+                       unsigned long *sb_states;
+               } sbm;
+
+               struct {
+                       /* Id of the first big block of this device. */
+                       unsigned long first_bb_id;
+                       /* Id of the last usable big block of this device. */
+                       unsigned long last_usable_bb_id;
+                       /* Id of the next device bock to prepare when needed. */
+                       unsigned long next_bb_id;
+
+                       /* Summary of all big block states. */
+                       unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT];
+
+                       /* One byte state per big block. See sbm.mb_states. */
+                       uint8_t *bb_states;
+
+                       /* The block size used for plugging/adding/removing. */
+                       uint64_t bb_size;
+               } bbm;
+       };
 
        /*
-        * Mutex that protects the nb_mb_state, mb_state, and sb_bitmap.
+        * Mutex that protects the sbm.mb_count, sbm.mb_states,
+        * sbm.sb_states, bbm.bb_count, and bbm.bb_states
         *
         * When this lock is held the pointers can't change, ONLINE and
         * OFFLINE blocks can't change the state and no subblocks will get
@@ -160,6 +249,11 @@ static DEFINE_MUTEX(virtio_mem_mutex);
 static LIST_HEAD(virtio_mem_devices);
 
 static void virtio_mem_online_page_cb(struct page *page, unsigned int order);
+static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
+                                                 unsigned long nr_pages);
+static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
+                                                  unsigned long nr_pages);
+static void virtio_mem_retry(struct virtio_mem *vm);
 
 /*
  * Register a virtio-mem device so it will be considered for the online_page
@@ -213,6 +307,24 @@ static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id)
 }
 
 /*
+ * Calculate the big block id of a given address.
+ */
+static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm,
+                                             uint64_t addr)
+{
+       return addr / vm->bbm.bb_size;
+}
+
+/*
+ * Calculate the physical start address of a given big block id.
+ */
+static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm,
+                                        unsigned long bb_id)
+{
+       return bb_id * vm->bbm.bb_size;
+}
+
+/*
  * Calculate the subblock id of a given address.
  */
 static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm,
@@ -221,89 +333,164 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm,
        const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
        const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id);
 
-       return (addr - mb_addr) / vm->subblock_size;
+       return (addr - mb_addr) / vm->sbm.sb_size;
 }
 
 /*
+ * Set the state of a big block, taking care of the state counter.
+ */
+static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm,
+                                       unsigned long bb_id,
+                                       enum virtio_mem_bbm_bb_state state)
+{
+       const unsigned long idx = bb_id - vm->bbm.first_bb_id;
+       enum virtio_mem_bbm_bb_state old_state;
+
+       old_state = vm->bbm.bb_states[idx];
+       vm->bbm.bb_states[idx] = state;
+
+       BUG_ON(vm->bbm.bb_count[old_state] == 0);
+       vm->bbm.bb_count[old_state]--;
+       vm->bbm.bb_count[state]++;
+}
+
+/*
+ * Get the state of a big block.
+ */
+static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm,
+                                                               unsigned long bb_id)
+{
+       return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id];
+}
+
+/*
+ * Prepare the big block state array for the next big block.
+ */
+static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm)
+{
+       unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id;
+       unsigned long new_bytes = old_bytes + 1;
+       int old_pages = PFN_UP(old_bytes);
+       int new_pages = PFN_UP(new_bytes);
+       uint8_t *new_array;
+
+       if (vm->bbm.bb_states && old_pages == new_pages)
+               return 0;
+
+       new_array = vzalloc(new_pages * PAGE_SIZE);
+       if (!new_array)
+               return -ENOMEM;
+
+       mutex_lock(&vm->hotplug_mutex);
+       if (vm->bbm.bb_states)
+               memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE);
+       vfree(vm->bbm.bb_states);
+       vm->bbm.bb_states = new_array;
+       mutex_unlock(&vm->hotplug_mutex);
+
+       return 0;
+}
+
+#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \
+       for (_bb_id = vm->bbm.first_bb_id; \
+            _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \
+            _bb_id++) \
+               if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
+
+#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \
+       for (_bb_id = vm->bbm.next_bb_id - 1; \
+            _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \
+            _bb_id--) \
+               if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
+
+/*
  * Set the state of a memory block, taking care of the state counter.
  */
-static void virtio_mem_mb_set_state(struct virtio_mem *vm, unsigned long mb_id,
-                                   enum virtio_mem_mb_state state)
+static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm,
+                                       unsigned long mb_id, uint8_t state)
 {
-       const unsigned long idx = mb_id - vm->first_mb_id;
-       enum virtio_mem_mb_state old_state;
+       const unsigned long idx = mb_id - vm->sbm.first_mb_id;
+       uint8_t old_state;
 
-       old_state = vm->mb_state[idx];
-       vm->mb_state[idx] = state;
+       old_state = vm->sbm.mb_states[idx];
+       vm->sbm.mb_states[idx] = state;
 
-       BUG_ON(vm->nb_mb_state[old_state] == 0);
-       vm->nb_mb_state[old_state]--;
-       vm->nb_mb_state[state]++;
+       BUG_ON(vm->sbm.mb_count[old_state] == 0);
+       vm->sbm.mb_count[old_state]--;
+       vm->sbm.mb_count[state]++;
 }
 
 /*
  * Get the state of a memory block.
  */
-static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm,
-                                                       unsigned long mb_id)
+static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm,
+                                          unsigned long mb_id)
 {
-       const unsigned long idx = mb_id - vm->first_mb_id;
+       const unsigned long idx = mb_id - vm->sbm.first_mb_id;
 
-       return vm->mb_state[idx];
+       return vm->sbm.mb_states[idx];
 }
 
 /*
  * Prepare the state array for the next memory block.
  */
-static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm)
+static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm)
 {
-       unsigned long old_bytes = vm->next_mb_id - vm->first_mb_id + 1;
-       unsigned long new_bytes = vm->next_mb_id - vm->first_mb_id + 2;
-       int old_pages = PFN_UP(old_bytes);
-       int new_pages = PFN_UP(new_bytes);
-       uint8_t *new_mb_state;
+       int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id);
+       int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1);
+       uint8_t *new_array;
 
-       if (vm->mb_state && old_pages == new_pages)
+       if (vm->sbm.mb_states && old_pages == new_pages)
                return 0;
 
-       new_mb_state = vzalloc(new_pages * PAGE_SIZE);
-       if (!new_mb_state)
+       new_array = vzalloc(new_pages * PAGE_SIZE);
+       if (!new_array)
                return -ENOMEM;
 
        mutex_lock(&vm->hotplug_mutex);
-       if (vm->mb_state)
-               memcpy(new_mb_state, vm->mb_state, old_pages * PAGE_SIZE);
-       vfree(vm->mb_state);
-       vm->mb_state = new_mb_state;
+       if (vm->sbm.mb_states)
+               memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE);
+       vfree(vm->sbm.mb_states);
+       vm->sbm.mb_states = new_array;
        mutex_unlock(&vm->hotplug_mutex);
 
        return 0;
 }
 
-#define virtio_mem_for_each_mb_state(_vm, _mb_id, _state) \
-       for (_mb_id = _vm->first_mb_id; \
-            _mb_id < _vm->next_mb_id && _vm->nb_mb_state[_state]; \
+#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \
+       for (_mb_id = _vm->sbm.first_mb_id; \
+            _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \
             _mb_id++) \
-               if (virtio_mem_mb_get_state(_vm, _mb_id) == _state)
+               if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
 
-#define virtio_mem_for_each_mb_state_rev(_vm, _mb_id, _state) \
-       for (_mb_id = _vm->next_mb_id - 1; \
-            _mb_id >= _vm->first_mb_id && _vm->nb_mb_state[_state]; \
+#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \
+       for (_mb_id = _vm->sbm.next_mb_id - 1; \
+            _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \
             _mb_id--) \
-               if (virtio_mem_mb_get_state(_vm, _mb_id) == _state)
+               if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
+
+/*
+ * Calculate the bit number in the subblock bitmap for the given subblock
+ * inside the given memory block.
+ */
+static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm,
+                                         unsigned long mb_id, int sb_id)
+{
+       return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id;
+}
 
 /*
  * Mark all selected subblocks plugged.
  *
  * Will not modify the state of the memory block.
  */
-static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm,
-                                        unsigned long mb_id, int sb_id,
-                                        int count)
+static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm,
+                                         unsigned long mb_id, int sb_id,
+                                         int count)
 {
-       const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+       const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
 
-       __bitmap_set(vm->sb_bitmap, bit, count);
+       __bitmap_set(vm->sbm.sb_states, bit, count);
 }
 
 /*
@@ -311,105 +498,114 @@ static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm,
  *
  * Will not modify the state of the memory block.
  */
-static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm,
-                                          unsigned long mb_id, int sb_id,
-                                          int count)
+static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm,
+                                           unsigned long mb_id, int sb_id,
+                                           int count)
 {
-       const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+       const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
 
-       __bitmap_clear(vm->sb_bitmap, bit, count);
+       __bitmap_clear(vm->sbm.sb_states, bit, count);
 }
 
 /*
  * Test if all selected subblocks are plugged.
  */
-static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm,
-                                         unsigned long mb_id, int sb_id,
-                                         int count)
+static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm,
+                                          unsigned long mb_id, int sb_id,
+                                          int count)
 {
-       const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+       const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
 
        if (count == 1)
-               return test_bit(bit, vm->sb_bitmap);
+               return test_bit(bit, vm->sbm.sb_states);
 
        /* TODO: Helper similar to bitmap_set() */
-       return find_next_zero_bit(vm->sb_bitmap, bit + count, bit) >=
+       return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >=
               bit + count;
 }
 
 /*
  * Test if all selected subblocks are unplugged.
  */
-static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm,
-                                           unsigned long mb_id, int sb_id,
-                                           int count)
+static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm,
+                                            unsigned long mb_id, int sb_id,
+                                            int count)
 {
-       const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id;
+       const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
 
        /* TODO: Helper similar to bitmap_set() */
-       return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count;
+       return find_next_bit(vm->sbm.sb_states, bit + count, bit) >=
+              bit + count;
 }
 
 /*
- * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is
+ * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is
  * none.
  */
-static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm,
+static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm,
                                            unsigned long mb_id)
 {
-       const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb;
+       const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0);
 
-       return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) -
-              bit;
+       return find_next_zero_bit(vm->sbm.sb_states,
+                                 bit + vm->sbm.sbs_per_mb, bit) - bit;
 }
 
 /*
  * Prepare the subblock bitmap for the next memory block.
  */
-static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm)
+static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm)
 {
-       const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id;
-       const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb;
-       const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb;
+       const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id;
+       const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb;
+       const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb;
        int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long));
        int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long));
-       unsigned long *new_sb_bitmap, *old_sb_bitmap;
+       unsigned long *new_bitmap, *old_bitmap;
 
-       if (vm->sb_bitmap && old_pages == new_pages)
+       if (vm->sbm.sb_states && old_pages == new_pages)
                return 0;
 
-       new_sb_bitmap = vzalloc(new_pages * PAGE_SIZE);
-       if (!new_sb_bitmap)
+       new_bitmap = vzalloc(new_pages * PAGE_SIZE);
+       if (!new_bitmap)
                return -ENOMEM;
 
        mutex_lock(&vm->hotplug_mutex);
-       if (new_sb_bitmap)
-               memcpy(new_sb_bitmap, vm->sb_bitmap, old_pages * PAGE_SIZE);
+       if (new_bitmap)
+               memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE);
 
-       old_sb_bitmap = vm->sb_bitmap;
-       vm->sb_bitmap = new_sb_bitmap;
+       old_bitmap = vm->sbm.sb_states;
+       vm->sbm.sb_states = new_bitmap;
        mutex_unlock(&vm->hotplug_mutex);
 
-       vfree(old_sb_bitmap);
+       vfree(old_bitmap);
        return 0;
 }
 
 /*
- * Try to add a memory block to Linux. This will usually only fail
- * if out of memory.
+ * Test if we could add memory without creating too much offline memory -
+ * to avoid running OOM if memory is getting onlined deferred.
+ */
+static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size)
+{
+       if (WARN_ON_ONCE(size > vm->offline_threshold))
+               return false;
+
+       return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold;
+}
+
+/*
+ * Try adding memory to Linux. Will usually only fail if out of memory.
  *
  * Must not be called with the vm->hotplug_mutex held (possible deadlock with
  * onlining code).
  *
- * Will not modify the state of the memory block.
+ * Will not modify the state of memory blocks in virtio-mem.
  */
-static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
+static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
+                                uint64_t size)
 {
-       const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
-       int nid = vm->nid;
-
-       if (nid == NUMA_NO_NODE)
-               nid = memory_add_physaddr_to_nid(addr);
+       int rc;
 
        /*
         * When force-unloading the driver and we still have memory added to
@@ -422,53 +618,155 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
                        return -ENOMEM;
        }
 
-       dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
-       return add_memory_driver_managed(nid, addr, memory_block_size_bytes(),
-                                        vm->resource_name,
-                                        MEMHP_MERGE_RESOURCE);
+       dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr,
+               addr + size - 1);
+       /* Memory might get onlined immediately. */
+       atomic64_add(size, &vm->offline_size);
+       rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
+                                      MEMHP_MERGE_RESOURCE);
+       if (rc) {
+               atomic64_sub(size, &vm->offline_size);
+               dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
+               /*
+                * TODO: Linux MM does not properly clean up yet in all cases
+                * where adding of memory failed - especially on -ENOMEM.
+                */
+       }
+       return rc;
+}
+
+/*
+ * See virtio_mem_add_memory(): Try adding a single Linux memory block.
+ */
+static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id)
+{
+       const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
+       const uint64_t size = memory_block_size_bytes();
+
+       return virtio_mem_add_memory(vm, addr, size);
+}
+
+/*
+ * See virtio_mem_add_memory(): Try adding a big block.
+ */
+static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+       const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+       const uint64_t size = vm->bbm.bb_size;
+
+       return virtio_mem_add_memory(vm, addr, size);
 }
 
 /*
- * Try to remove a memory block from Linux. Will only fail if the memory block
- * is not offline.
+ * Try removing memory from Linux. Will only fail if memory blocks aren't
+ * offline.
  *
  * Must not be called with the vm->hotplug_mutex held (possible deadlock with
  * onlining code).
  *
- * Will not modify the state of the memory block.
+ * Will not modify the state of memory blocks in virtio-mem.
+ */
+static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr,
+                                   uint64_t size)
+{
+       int rc;
+
+       dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr,
+               addr + size - 1);
+       rc = remove_memory(vm->nid, addr, size);
+       if (!rc) {
+               atomic64_sub(size, &vm->offline_size);
+               /*
+                * We might have freed up memory we can now unplug, retry
+                * immediately instead of waiting.
+                */
+               virtio_mem_retry(vm);
+       } else {
+               dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc);
+       }
+       return rc;
+}
+
+/*
+ * See virtio_mem_remove_memory(): Try removing a single Linux memory block.
  */
-static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id)
+static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id)
 {
        const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
-       int nid = vm->nid;
+       const uint64_t size = memory_block_size_bytes();
 
-       if (nid == NUMA_NO_NODE)
-               nid = memory_add_physaddr_to_nid(addr);
+       return virtio_mem_remove_memory(vm, addr, size);
+}
+
+/*
+ * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered
+ * by the big block.
+ */
+static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+       const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+       const uint64_t size = vm->bbm.bb_size;
 
-       dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id);
-       return remove_memory(nid, addr, memory_block_size_bytes());
+       return virtio_mem_remove_memory(vm, addr, size);
 }
 
 /*
- * Try to offline and remove a memory block from Linux.
+ * Try offlining and removing memory from Linux.
  *
  * Must not be called with the vm->hotplug_mutex held (possible deadlock with
  * onlining code).
  *
- * Will not modify the state of the memory block.
+ * Will not modify the state of memory blocks in virtio-mem.
  */
-static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm,
-                                           unsigned long mb_id)
+static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
+                                               uint64_t addr,
+                                               uint64_t size)
+{
+       int rc;
+
+       dev_dbg(&vm->vdev->dev,
+               "offlining and removing memory: 0x%llx - 0x%llx\n", addr,
+               addr + size - 1);
+
+       rc = offline_and_remove_memory(vm->nid, addr, size);
+       if (!rc) {
+               atomic64_sub(size, &vm->offline_size);
+               /*
+                * We might have freed up memory we can now unplug, retry
+                * immediately instead of waiting.
+                */
+               virtio_mem_retry(vm);
+       } else {
+               dev_dbg(&vm->vdev->dev,
+                       "offlining and removing memory failed: %d\n", rc);
+       }
+       return rc;
+}
+
+/*
+ * See virtio_mem_offline_and_remove_memory(): Try offlining and removing
+ * a single Linux memory block.
+ */
+static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
+                                               unsigned long mb_id)
 {
        const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
-       int nid = vm->nid;
+       const uint64_t size = memory_block_size_bytes();
 
-       if (nid == NUMA_NO_NODE)
-               nid = memory_add_physaddr_to_nid(addr);
+       return virtio_mem_offline_and_remove_memory(vm, addr, size);
+}
+
+/*
+ * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
+ * all Linux memory blocks covered by the big block.
+ */
+static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm,
+                                               unsigned long bb_id)
+{
+       const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+       const uint64_t size = vm->bbm.bb_size;
 
-       dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n",
-               mb_id);
-       return offline_and_remove_memory(nid, addr, memory_block_size_bytes());
+       return virtio_mem_offline_and_remove_memory(vm, addr, size);
 }
 
 /*
@@ -499,31 +797,28 @@ static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id)
  * Test if a virtio-mem device overlaps with the given range. Can be called
  * from (notifier) callbacks lockless.
  */
-static bool virtio_mem_overlaps_range(struct virtio_mem *vm,
-                                     unsigned long start, unsigned long size)
+static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start,
+                                     uint64_t size)
 {
-       unsigned long dev_start = virtio_mem_mb_id_to_phys(vm->first_mb_id);
-       unsigned long dev_end = virtio_mem_mb_id_to_phys(vm->last_mb_id) +
-                               memory_block_size_bytes();
-
-       return start < dev_end && dev_start < start + size;
+       return start < vm->addr + vm->region_size && vm->addr < start + size;
 }
 
 /*
- * Test if a virtio-mem device owns a memory block. Can be called from
+ * Test if a virtio-mem device contains a given range. Can be called from
  * (notifier) callbacks lockless.
  */
-static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id)
+static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start,
+                                     uint64_t size)
 {
-       return mb_id >= vm->first_mb_id && mb_id <= vm->last_mb_id;
+       return start >= vm->addr && start + size <= vm->addr + vm->region_size;
 }
 
-static int virtio_mem_notify_going_online(struct virtio_mem *vm,
-                                         unsigned long mb_id)
+static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm,
+                                             unsigned long mb_id)
 {
-       switch (virtio_mem_mb_get_state(vm, mb_id)) {
-       case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
-       case VIRTIO_MEM_MB_STATE_OFFLINE:
+       switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
+       case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
+       case VIRTIO_MEM_SBM_MB_OFFLINE:
                return NOTIFY_OK;
        default:
                break;
@@ -533,108 +828,100 @@ static int virtio_mem_notify_going_online(struct virtio_mem *vm,
        return NOTIFY_BAD;
 }
 
-static void virtio_mem_notify_offline(struct virtio_mem *vm,
-                                     unsigned long mb_id)
+static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
+                                         unsigned long mb_id)
 {
-       switch (virtio_mem_mb_get_state(vm, mb_id)) {
-       case VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL:
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
+       switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
+       case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL:
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
                break;
-       case VIRTIO_MEM_MB_STATE_ONLINE:
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_OFFLINE);
+       case VIRTIO_MEM_SBM_MB_ONLINE:
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_OFFLINE);
                break;
        default:
                BUG();
                break;
        }
-
-       /*
-        * Trigger the workqueue, maybe we can now unplug memory. Also,
-        * when we offline and remove a memory block, this will re-trigger
-        * us immediately - which is often nice because the removal of
-        * the memory block (e.g., memmap) might have freed up memory
-        * on other memory blocks we manage.
-        */
-       virtio_mem_retry(vm);
 }
 
-static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
+static void virtio_mem_sbm_notify_online(struct virtio_mem *vm,
+                                        unsigned long mb_id)
 {
-       unsigned long nb_offline;
-
-       switch (virtio_mem_mb_get_state(vm, mb_id)) {
-       case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
+       switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
+       case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                       VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
                break;
-       case VIRTIO_MEM_MB_STATE_OFFLINE:
-               virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE);
+       case VIRTIO_MEM_SBM_MB_OFFLINE:
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_ONLINE);
                break;
        default:
                BUG();
                break;
        }
-       nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
-                    vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
-
-       /* see if we can add new blocks now that we onlined one block */
-       if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1)
-               virtio_mem_retry(vm);
 }
 
-static void virtio_mem_notify_going_offline(struct virtio_mem *vm,
-                                           unsigned long mb_id)
+static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm,
+                                               unsigned long mb_id)
 {
-       const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
-       struct page *page;
+       const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
        unsigned long pfn;
-       int sb_id, i;
+       int sb_id;
 
-       for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
-               if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+       for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
+               if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
                        continue;
-               /*
-                * Drop our reference to the pages so the memory can get
-                * offlined and add the unplugged pages to the managed
-                * page counters (so offlining code can correctly subtract
-                * them again).
-                */
                pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
-                              sb_id * vm->subblock_size);
-               adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
-               for (i = 0; i < nr_pages; i++) {
-                       page = pfn_to_page(pfn + i);
-                       if (WARN_ON(!page_ref_dec_and_test(page)))
-                               dump_page(page, "unplugged page referenced");
-               }
+                              sb_id * vm->sbm.sb_size);
+               virtio_mem_fake_offline_going_offline(pfn, nr_pages);
        }
 }
 
-static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm,
-                                            unsigned long mb_id)
+static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm,
+                                                unsigned long mb_id)
 {
-       const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
+       const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
        unsigned long pfn;
-       int sb_id, i;
+       int sb_id;
 
-       for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) {
-               if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+       for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
+               if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
                        continue;
-               /*
-                * Get the reference we dropped when going offline and
-                * subtract the unplugged pages from the managed page
-                * counters.
-                */
                pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
-                              sb_id * vm->subblock_size);
-               adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
-               for (i = 0; i < nr_pages; i++)
-                       page_ref_inc(pfn_to_page(pfn + i));
+                              sb_id * vm->sbm.sb_size);
+               virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
        }
 }
 
+static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
+                                               unsigned long bb_id,
+                                               unsigned long pfn,
+                                               unsigned long nr_pages)
+{
+       /*
+        * When marked as "fake-offline", all online memory of this device block
+        * is allocated by us. Otherwise, we don't have any memory allocated.
+        */
+       if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+           VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
+               return;
+       virtio_mem_fake_offline_going_offline(pfn, nr_pages);
+}
+
+static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
+                                                unsigned long bb_id,
+                                                unsigned long pfn,
+                                                unsigned long nr_pages)
+{
+       if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+           VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
+               return;
+       virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
+}
+
 /*
  * This callback will either be called synchronously from add_memory() or
  * asynchronously (e.g., triggered via user space). We have to be careful
@@ -648,20 +935,33 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
        struct memory_notify *mhp = arg;
        const unsigned long start = PFN_PHYS(mhp->start_pfn);
        const unsigned long size = PFN_PHYS(mhp->nr_pages);
-       const unsigned long mb_id = virtio_mem_phys_to_mb_id(start);
        int rc = NOTIFY_OK;
+       unsigned long id;
 
        if (!virtio_mem_overlaps_range(vm, start, size))
                return NOTIFY_DONE;
 
-       /*
-        * Memory is onlined/offlined in memory block granularity. We cannot
-        * cross virtio-mem device boundaries and memory block boundaries. Bail
-        * out if this ever changes.
-        */
-       if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
-                        !IS_ALIGNED(start, memory_block_size_bytes())))
-               return NOTIFY_BAD;
+       if (vm->in_sbm) {
+               id = virtio_mem_phys_to_mb_id(start);
+               /*
+                * In SBM, we add memory in separate memory blocks - we expect
+                * it to be onlined/offlined in the same granularity. Bail out
+                * if this ever changes.
+                */
+               if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
+                                !IS_ALIGNED(start, memory_block_size_bytes())))
+                       return NOTIFY_BAD;
+       } else {
+               id = virtio_mem_phys_to_bb_id(vm, start);
+               /*
+                * In BBM, we only care about onlining/offlining happening
+                * within a single big block, we don't care about the
+                * actual granularity as we don't track individual Linux
+                * memory blocks.
+                */
+               if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1)))
+                       return NOTIFY_BAD;
+       }
 
        /*
         * Avoid circular locking lockdep warnings. We lock the mutex
@@ -680,7 +980,12 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
                        break;
                }
                vm->hotplug_active = true;
-               virtio_mem_notify_going_offline(vm, mb_id);
+               if (vm->in_sbm)
+                       virtio_mem_sbm_notify_going_offline(vm, id);
+               else
+                       virtio_mem_bbm_notify_going_offline(vm, id,
+                                                           mhp->start_pfn,
+                                                           mhp->nr_pages);
                break;
        case MEM_GOING_ONLINE:
                mutex_lock(&vm->hotplug_mutex);
@@ -690,22 +995,51 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
                        break;
                }
                vm->hotplug_active = true;
-               rc = virtio_mem_notify_going_online(vm, mb_id);
+               if (vm->in_sbm)
+                       rc = virtio_mem_sbm_notify_going_online(vm, id);
                break;
        case MEM_OFFLINE:
-               virtio_mem_notify_offline(vm, mb_id);
+               if (vm->in_sbm)
+                       virtio_mem_sbm_notify_offline(vm, id);
+
+               atomic64_add(size, &vm->offline_size);
+               /*
+                * Trigger the workqueue. Now that we have some offline memory,
+                * maybe we can handle pending unplug requests.
+                */
+               if (!unplug_online)
+                       virtio_mem_retry(vm);
+
                vm->hotplug_active = false;
                mutex_unlock(&vm->hotplug_mutex);
                break;
        case MEM_ONLINE:
-               virtio_mem_notify_online(vm, mb_id);
+               if (vm->in_sbm)
+                       virtio_mem_sbm_notify_online(vm, id);
+
+               atomic64_sub(size, &vm->offline_size);
+               /*
+                * Start adding more memory once we onlined half of our
+                * threshold. Don't trigger if it's possibly due to our actipn
+                * (e.g., us adding memory which gets onlined immediately from
+                * the core).
+                */
+               if (!atomic_read(&vm->wq_active) &&
+                   virtio_mem_could_add_memory(vm, vm->offline_threshold / 2))
+                       virtio_mem_retry(vm);
+
                vm->hotplug_active = false;
                mutex_unlock(&vm->hotplug_mutex);
                break;
        case MEM_CANCEL_OFFLINE:
                if (!vm->hotplug_active)
                        break;
-               virtio_mem_notify_cancel_offline(vm, mb_id);
+               if (vm->in_sbm)
+                       virtio_mem_sbm_notify_cancel_offline(vm, id);
+               else
+                       virtio_mem_bbm_notify_cancel_offline(vm, id,
+                                                            mhp->start_pfn,
+                                                            mhp->nr_pages);
                vm->hotplug_active = false;
                mutex_unlock(&vm->hotplug_mutex);
                break;
@@ -729,7 +1063,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
  * (via generic_online_page()) using PageDirty().
  */
 static void virtio_mem_set_fake_offline(unsigned long pfn,
-                                       unsigned int nr_pages, bool onlined)
+                                       unsigned long nr_pages, bool onlined)
 {
        for (; nr_pages--; pfn++) {
                struct page *page = pfn_to_page(pfn);
@@ -748,7 +1082,7 @@ static void virtio_mem_set_fake_offline(unsigned long pfn,
  * (via generic_online_page()), clear PageDirty().
  */
 static void virtio_mem_clear_fake_offline(unsigned long pfn,
-                                         unsigned int nr_pages, bool onlined)
+                                         unsigned long nr_pages, bool onlined)
 {
        for (; nr_pages--; pfn++) {
                struct page *page = pfn_to_page(pfn);
@@ -763,16 +1097,17 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn,
  * Release a range of fake-offline pages to the buddy, effectively
  * fake-onlining them.
  */
-static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
+static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
 {
-       const int order = MAX_ORDER - 1;
-       int i;
+       const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES;
+       unsigned long i;
 
        /*
-        * We are always called with subblock granularity, which is at least
-        * aligned to MAX_ORDER - 1.
+        * We are always called at least with MAX_ORDER_NR_PAGES
+        * granularity/alignment (e.g., the way subblocks work). All pages
+        * inside such a block are alike.
         */
-       for (i = 0; i < nr_pages; i += 1 << order) {
+       for (i = 0; i < nr_pages; i += max_nr_pages) {
                struct page *page = pfn_to_page(pfn + i);
 
                /*
@@ -782,42 +1117,128 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
                 * alike.
                 */
                if (PageDirty(page)) {
-                       virtio_mem_clear_fake_offline(pfn + i, 1 << order,
+                       virtio_mem_clear_fake_offline(pfn + i, max_nr_pages,
                                                      false);
-                       generic_online_page(page, order);
+                       generic_online_page(page, MAX_ORDER - 1);
                } else {
-                       virtio_mem_clear_fake_offline(pfn + i, 1 << order,
+                       virtio_mem_clear_fake_offline(pfn + i, max_nr_pages,
                                                      true);
-                       free_contig_range(pfn + i, 1 << order);
-                       adjust_managed_page_count(page, 1 << order);
+                       free_contig_range(pfn + i, max_nr_pages);
+                       adjust_managed_page_count(page, max_nr_pages);
                }
        }
 }
 
+/*
+ * Try to allocate a range, marking pages fake-offline, effectively
+ * fake-offlining them.
+ */
+static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
+{
+       const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) ==
+                               ZONE_MOVABLE;
+       int rc, retry_count;
+
+       /*
+        * TODO: We want an alloc_contig_range() mode that tries to allocate
+        * harder (e.g., dealing with temporarily pinned pages, PCP), especially
+        * with ZONE_MOVABLE. So for now, retry a couple of times with
+        * ZONE_MOVABLE before giving up - because that zone is supposed to give
+        * some guarantees.
+        */
+       for (retry_count = 0; retry_count < 5; retry_count++) {
+               rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
+                                       GFP_KERNEL);
+               if (rc == -ENOMEM)
+                       /* whoops, out of memory */
+                       return rc;
+               else if (rc && !is_movable)
+                       break;
+               else if (rc)
+                       continue;
+
+               virtio_mem_set_fake_offline(pfn, nr_pages, true);
+               adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
+               return 0;
+       }
+
+       return -EBUSY;
+}
+
+/*
+ * Handle fake-offline pages when memory is going offline - such that the
+ * pages can be skipped by mm-core when offlining.
+ */
+static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
+                                                 unsigned long nr_pages)
+{
+       struct page *page;
+       unsigned long i;
+
+       /*
+        * Drop our reference to the pages so the memory can get offlined
+        * and add the unplugged pages to the managed page counters (so
+        * offlining code can correctly subtract them again).
+        */
+       adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
+       /* Drop our reference to the pages so the memory can get offlined. */
+       for (i = 0; i < nr_pages; i++) {
+               page = pfn_to_page(pfn + i);
+               if (WARN_ON(!page_ref_dec_and_test(page)))
+                       dump_page(page, "fake-offline page referenced");
+       }
+}
+
+/*
+ * Handle fake-offline pages when memory offlining is canceled - to undo
+ * what we did in virtio_mem_fake_offline_going_offline().
+ */
+static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
+                                                  unsigned long nr_pages)
+{
+       unsigned long i;
+
+       /*
+        * Get the reference we dropped when going offline and subtract the
+        * unplugged pages from the managed page counters.
+        */
+       adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
+       for (i = 0; i < nr_pages; i++)
+               page_ref_inc(pfn_to_page(pfn + i));
+}
+
 static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
 {
        const unsigned long addr = page_to_phys(page);
-       const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
+       unsigned long id, sb_id;
        struct virtio_mem *vm;
-       int sb_id;
+       bool do_online;
 
-       /*
-        * We exploit here that subblocks have at least MAX_ORDER - 1
-        * size/alignment and that this callback is is called with such a
-        * size/alignment. So we cannot cross subblocks and therefore
-        * also not memory blocks.
-        */
        rcu_read_lock();
        list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
-               if (!virtio_mem_owned_mb(vm, mb_id))
+               if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
                        continue;
 
-               sb_id = virtio_mem_phys_to_sb_id(vm, addr);
-               /*
-                * If plugged, online the pages, otherwise, set them fake
-                * offline (PageOffline).
-                */
-               if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+               if (vm->in_sbm) {
+                       /*
+                        * We exploit here that subblocks have at least
+                        * MAX_ORDER_NR_PAGES size/alignment - so we cannot
+                        * cross subblocks within one call.
+                        */
+                       id = virtio_mem_phys_to_mb_id(addr);
+                       sb_id = virtio_mem_phys_to_sb_id(vm, addr);
+                       do_online = virtio_mem_sbm_test_sb_plugged(vm, id,
+                                                                  sb_id, 1);
+               } else {
+                       /*
+                        * If the whole block is marked fake offline, keep
+                        * everything that way.
+                        */
+                       id = virtio_mem_phys_to_bb_id(vm, addr);
+                       do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
+                                   VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
+               }
+               if (do_online)
                        generic_online_page(page, order);
                else
                        virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
@@ -870,23 +1291,33 @@ static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr,
                .u.plug.addr = cpu_to_virtio64(vm->vdev, addr),
                .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
        };
+       int rc = -ENOMEM;
 
        if (atomic_read(&vm->config_changed))
                return -EAGAIN;
 
+       dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr,
+               addr + size - 1);
+
        switch (virtio_mem_send_request(vm, &req)) {
        case VIRTIO_MEM_RESP_ACK:
                vm->plugged_size += size;
                return 0;
        case VIRTIO_MEM_RESP_NACK:
-               return -EAGAIN;
+               rc = -EAGAIN;
+               break;
        case VIRTIO_MEM_RESP_BUSY:
-               return -ETXTBSY;
+               rc = -ETXTBSY;
+               break;
        case VIRTIO_MEM_RESP_ERROR:
-               return -EINVAL;
+               rc = -EINVAL;
+               break;
        default:
-               return -ENOMEM;
+               break;
        }
+
+       dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc);
+       return rc;
 }
 
 static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr,
@@ -898,21 +1329,30 @@ static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr,
                .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr),
                .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
        };
+       int rc = -ENOMEM;
 
        if (atomic_read(&vm->config_changed))
                return -EAGAIN;
 
+       dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr,
+               addr + size - 1);
+
        switch (virtio_mem_send_request(vm, &req)) {
        case VIRTIO_MEM_RESP_ACK:
                vm->plugged_size -= size;
                return 0;
        case VIRTIO_MEM_RESP_BUSY:
-               return -ETXTBSY;
+               rc = -ETXTBSY;
+               break;
        case VIRTIO_MEM_RESP_ERROR:
-               return -EINVAL;
+               rc = -EINVAL;
+               break;
        default:
-               return -ENOMEM;
+               break;
        }
+
+       dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc);
+       return rc;
 }
 
 static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
@@ -920,6 +1360,9 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
        const struct virtio_mem_req req = {
                .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL),
        };
+       int rc = -ENOMEM;
+
+       dev_dbg(&vm->vdev->dev, "unplugging all memory");
 
        switch (virtio_mem_send_request(vm, &req)) {
        case VIRTIO_MEM_RESP_ACK:
@@ -929,30 +1372,31 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
                atomic_set(&vm->config_changed, 1);
                return 0;
        case VIRTIO_MEM_RESP_BUSY:
-               return -ETXTBSY;
+               rc = -ETXTBSY;
+               break;
        default:
-               return -ENOMEM;
+               break;
        }
+
+       dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc);
+       return rc;
 }
 
 /*
  * Plug selected subblocks. Updates the plugged state, but not the state
  * of the memory block.
  */
-static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
-                                int sb_id, int count)
+static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
+                                 int sb_id, int count)
 {
        const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
-                             sb_id * vm->subblock_size;
-       const uint64_t size = count * vm->subblock_size;
+                             sb_id * vm->sbm.sb_size;
+       const uint64_t size = count * vm->sbm.sb_size;
        int rc;
 
-       dev_dbg(&vm->vdev->dev, "plugging memory block: %lu : %i - %i\n", mb_id,
-               sb_id, sb_id + count - 1);
-
        rc = virtio_mem_send_plug_request(vm, addr, size);
        if (!rc)
-               virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count);
+               virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count);
        return rc;
 }
 
@@ -960,24 +1404,47 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
  * Unplug selected subblocks. Updates the plugged state, but not the state
  * of the memory block.
  */
-static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
-                                  int sb_id, int count)
+static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
+                                   int sb_id, int count)
 {
        const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
-                             sb_id * vm->subblock_size;
-       const uint64_t size = count * vm->subblock_size;
+                             sb_id * vm->sbm.sb_size;
+       const uint64_t size = count * vm->sbm.sb_size;
        int rc;
 
-       dev_dbg(&vm->vdev->dev, "unplugging memory block: %lu : %i - %i\n",
-               mb_id, sb_id, sb_id + count - 1);
-
        rc = virtio_mem_send_unplug_request(vm, addr, size);
        if (!rc)
-               virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count);
+               virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count);
        return rc;
 }
 
 /*
+ * Request to unplug a big block.
+ *
+ * Will not modify the state of the big block.
+ */
+static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+       const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+       const uint64_t size = vm->bbm.bb_size;
+
+       return virtio_mem_send_unplug_request(vm, addr, size);
+}
+
+/*
+ * Request to plug a big block.
+ *
+ * Will not modify the state of the big block.
+ */
+static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id)
+{
+       const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
+       const uint64_t size = vm->bbm.bb_size;
+
+       return virtio_mem_send_plug_request(vm, addr, size);
+}
+
+/*
  * Unplug the desired number of plugged subblocks of a offline or not-added
  * memory block. Will fail if any subblock cannot get unplugged (instead of
  * skipping it).
@@ -986,29 +1453,29 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
  *
  * Note: can fail after some subblocks were unplugged.
  */
-static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm,
-                                      unsigned long mb_id, uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
+                                       unsigned long mb_id, uint64_t *nb_sb)
 {
        int sb_id, count;
        int rc;
 
-       sb_id = vm->nb_sb_per_mb - 1;
+       sb_id = vm->sbm.sbs_per_mb - 1;
        while (*nb_sb) {
                /* Find the next candidate subblock */
                while (sb_id >= 0 &&
-                      virtio_mem_mb_test_sb_unplugged(vm, mb_id, sb_id, 1))
+                      virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1))
                        sb_id--;
                if (sb_id < 0)
                        break;
                /* Try to unplug multiple subblocks at a time */
                count = 1;
                while (count < *nb_sb && sb_id > 0 &&
-                      virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
+                      virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
                        count++;
                        sb_id--;
                }
 
-               rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count);
+               rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
                if (rc)
                        return rc;
                *nb_sb -= count;
@@ -1025,63 +1492,50 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm,
  *
  * Note: can fail after some subblocks were unplugged.
  */
-static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id)
+static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id)
 {
-       uint64_t nb_sb = vm->nb_sb_per_mb;
+       uint64_t nb_sb = vm->sbm.sbs_per_mb;
 
-       return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb);
+       return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
 }
 
 /*
  * Prepare tracking data for the next memory block.
  */
-static int virtio_mem_prepare_next_mb(struct virtio_mem *vm,
-                                     unsigned long *mb_id)
+static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm,
+                                         unsigned long *mb_id)
 {
        int rc;
 
-       if (vm->next_mb_id > vm->last_usable_mb_id)
+       if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id)
                return -ENOSPC;
 
        /* Resize the state array if required. */
-       rc = virtio_mem_mb_state_prepare_next_mb(vm);
+       rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm);
        if (rc)
                return rc;
 
        /* Resize the subblock bitmap if required. */
-       rc = virtio_mem_sb_bitmap_prepare_next_mb(vm);
+       rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm);
        if (rc)
                return rc;
 
-       vm->nb_mb_state[VIRTIO_MEM_MB_STATE_UNUSED]++;
-       *mb_id = vm->next_mb_id++;
+       vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++;
+       *mb_id = vm->sbm.next_mb_id++;
        return 0;
 }
 
 /*
- * Don't add too many blocks that are not onlined yet to avoid running OOM.
- */
-static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm)
-{
-       unsigned long nb_offline;
-
-       nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] +
-                    vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL];
-       return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD;
-}
-
-/*
  * Try to plug the desired number of subblocks and add the memory block
  * to Linux.
  *
  * Will modify the state of the memory block.
  */
-static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm,
-                                     unsigned long mb_id,
-                                     uint64_t *nb_sb)
+static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
+                                         unsigned long mb_id, uint64_t *nb_sb)
 {
-       const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb);
-       int rc, rc2;
+       const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb);
+       int rc;
 
        if (WARN_ON_ONCE(!count))
                return -EINVAL;
@@ -1090,7 +1544,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm,
         * Plug the requested number of subblocks before adding it to linux,
         * so that onlining will directly online all plugged subblocks.
         */
-       rc = virtio_mem_mb_plug_sb(vm, mb_id, 0, count);
+       rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count);
        if (rc)
                return rc;
 
@@ -1098,29 +1552,21 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm,
         * Mark the block properly offline before adding it to Linux,
         * so the memory notifiers will find the block in the right state.
         */
-       if (count == vm->nb_sb_per_mb)
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_OFFLINE);
+       if (count == vm->sbm.sbs_per_mb)
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_OFFLINE);
        else
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
 
        /* Add the memory block to linux - if that fails, try to unplug. */
-       rc = virtio_mem_mb_add(vm, mb_id);
+       rc = virtio_mem_sbm_add_mb(vm, mb_id);
        if (rc) {
-               enum virtio_mem_mb_state new_state = VIRTIO_MEM_MB_STATE_UNUSED;
-
-               dev_err(&vm->vdev->dev,
-                       "adding memory block %lu failed with %d\n", mb_id, rc);
-               rc2 = virtio_mem_mb_unplug_sb(vm, mb_id, 0, count);
+               int new_state = VIRTIO_MEM_SBM_MB_UNUSED;
 
-               /*
-                * TODO: Linux MM does not properly clean up yet in all cases
-                * where adding of memory failed - especially on -ENOMEM.
-                */
-               if (rc2)
-                       new_state = VIRTIO_MEM_MB_STATE_PLUGGED;
-               virtio_mem_mb_set_state(vm, mb_id, new_state);
+               if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count))
+                       new_state = VIRTIO_MEM_SBM_MB_PLUGGED;
+               virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
                return rc;
        }
 
@@ -1136,8 +1582,9 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm,
  *
  * Note: Can fail after some subblocks were successfully plugged.
  */
-static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id,
-                                    uint64_t *nb_sb, bool online)
+static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
+                                     unsigned long mb_id, uint64_t *nb_sb,
+                                     bool online)
 {
        unsigned long pfn, nr_pages;
        int sb_id, count;
@@ -1147,17 +1594,16 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id,
                return -EINVAL;
 
        while (*nb_sb) {
-               sb_id = virtio_mem_mb_first_unplugged_sb(vm, mb_id);
-               if (sb_id >= vm->nb_sb_per_mb)
+               sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id);
+               if (sb_id >= vm->sbm.sbs_per_mb)
                        break;
                count = 1;
                while (count < *nb_sb &&
-                      sb_id + count < vm->nb_sb_per_mb &&
-                      !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id + count,
-                                                     1))
+                      sb_id + count < vm->sbm.sbs_per_mb &&
+                      !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1))
                        count++;
 
-               rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count);
+               rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count);
                if (rc)
                        return rc;
                *nb_sb -= count;
@@ -1166,29 +1612,26 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id,
 
                /* fake-online the pages if the memory block is online */
                pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
-                              sb_id * vm->subblock_size);
-               nr_pages = PFN_DOWN(count * vm->subblock_size);
+                              sb_id * vm->sbm.sb_size);
+               nr_pages = PFN_DOWN(count * vm->sbm.sb_size);
                virtio_mem_fake_online(pfn, nr_pages);
        }
 
-       if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
+       if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
                if (online)
-                       virtio_mem_mb_set_state(vm, mb_id,
-                                               VIRTIO_MEM_MB_STATE_ONLINE);
+                       virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                                   VIRTIO_MEM_SBM_MB_ONLINE);
                else
-                       virtio_mem_mb_set_state(vm, mb_id,
-                                               VIRTIO_MEM_MB_STATE_OFFLINE);
+                       virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                                   VIRTIO_MEM_SBM_MB_OFFLINE);
        }
 
        return 0;
 }
 
-/*
- * Try to plug the requested amount of memory.
- */
-static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
+static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
 {
-       uint64_t nb_sb = diff / vm->subblock_size;
+       uint64_t nb_sb = diff / vm->sbm.sb_size;
        unsigned long mb_id;
        int rc;
 
@@ -1199,18 +1642,18 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
        mutex_lock(&vm->hotplug_mutex);
 
        /* Try to plug subblocks of partially plugged online blocks. */
-       virtio_mem_for_each_mb_state(vm, mb_id,
-                                    VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
-               rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true);
+       virtio_mem_sbm_for_each_mb(vm, mb_id,
+                                  VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
+               rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true);
                if (rc || !nb_sb)
                        goto out_unlock;
                cond_resched();
        }
 
        /* Try to plug subblocks of partially plugged offline blocks. */
-       virtio_mem_for_each_mb_state(vm, mb_id,
-                                    VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
-               rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false);
+       virtio_mem_sbm_for_each_mb(vm, mb_id,
+                                  VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
+               rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false);
                if (rc || !nb_sb)
                        goto out_unlock;
                cond_resched();
@@ -1223,11 +1666,11 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
        mutex_unlock(&vm->hotplug_mutex);
 
        /* Try to plug and add unused blocks */
-       virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) {
-               if (virtio_mem_too_many_mb_offline(vm))
+       virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) {
+               if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
                        return -ENOSPC;
 
-               rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
+               rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
                if (rc || !nb_sb)
                        return rc;
                cond_resched();
@@ -1235,13 +1678,13 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
 
        /* Try to prepare, plug and add new blocks */
        while (nb_sb) {
-               if (virtio_mem_too_many_mb_offline(vm))
+               if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
                        return -ENOSPC;
 
-               rc = virtio_mem_prepare_next_mb(vm, &mb_id);
+               rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id);
                if (rc)
                        return rc;
-               rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb);
+               rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
                if (rc)
                        return rc;
                cond_resched();
@@ -1254,6 +1697,112 @@ out_unlock:
 }
 
 /*
+ * Plug a big block and add it to Linux.
+ *
+ * Will modify the state of the big block.
+ */
+static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm,
+                                         unsigned long bb_id)
+{
+       int rc;
+
+       if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+                        VIRTIO_MEM_BBM_BB_UNUSED))
+               return -EINVAL;
+
+       rc = virtio_mem_bbm_plug_bb(vm, bb_id);
+       if (rc)
+               return rc;
+       virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
+
+       rc = virtio_mem_bbm_add_bb(vm, bb_id);
+       if (rc) {
+               if (!virtio_mem_bbm_unplug_bb(vm, bb_id))
+                       virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                                   VIRTIO_MEM_BBM_BB_UNUSED);
+               else
+                       /* Retry from the main loop. */
+                       virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                                   VIRTIO_MEM_BBM_BB_PLUGGED);
+               return rc;
+       }
+       return 0;
+}
+
+/*
+ * Prepare tracking data for the next big block.
+ */
+static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm,
+                                         unsigned long *bb_id)
+{
+       int rc;
+
+       if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id)
+               return -ENOSPC;
+
+       /* Resize the big block state array if required. */
+       rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm);
+       if (rc)
+               return rc;
+
+       vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++;
+       *bb_id = vm->bbm.next_bb_id;
+       vm->bbm.next_bb_id++;
+       return 0;
+}
+
+static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff)
+{
+       uint64_t nb_bb = diff / vm->bbm.bb_size;
+       unsigned long bb_id;
+       int rc;
+
+       if (!nb_bb)
+               return 0;
+
+       /* Try to plug and add unused big blocks */
+       virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) {
+               if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
+                       return -ENOSPC;
+
+               rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
+               if (!rc)
+                       nb_bb--;
+               if (rc || !nb_bb)
+                       return rc;
+               cond_resched();
+       }
+
+       /* Try to prepare, plug and add new big blocks */
+       while (nb_bb) {
+               if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
+                       return -ENOSPC;
+
+               rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id);
+               if (rc)
+                       return rc;
+               rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
+               if (!rc)
+                       nb_bb--;
+               if (rc)
+                       return rc;
+               cond_resched();
+       }
+
+       return 0;
+}
+
+/*
+ * Try to plug the requested amount of memory.
+ */
+static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
+{
+       if (vm->in_sbm)
+               return virtio_mem_sbm_plug_request(vm, diff);
+       return virtio_mem_bbm_plug_request(vm, diff);
+}
+
+/*
  * Unplug the desired number of plugged subblocks of an offline memory block.
  * Will fail if any subblock cannot get unplugged (instead of skipping it).
  *
@@ -1262,33 +1811,33 @@ out_unlock:
  *
  * Note: Can fail after some subblocks were successfully unplugged.
  */
-static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm,
-                                              unsigned long mb_id,
-                                              uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm,
+                                               unsigned long mb_id,
+                                               uint64_t *nb_sb)
 {
        int rc;
 
-       rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb);
+       rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb);
 
        /* some subblocks might have been unplugged even on failure */
-       if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb))
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
+       if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
        if (rc)
                return rc;
 
-       if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
+       if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
                /*
                 * Remove the block from Linux - this should never fail.
                 * Hinder the block from getting onlined by marking it
                 * unplugged. Temporarily drop the mutex, so
                 * any pending GOING_ONLINE requests can be serviced/rejected.
                 */
-               virtio_mem_mb_set_state(vm, mb_id,
-                                       VIRTIO_MEM_MB_STATE_UNUSED);
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_UNUSED);
 
                mutex_unlock(&vm->hotplug_mutex);
-               rc = virtio_mem_mb_remove(vm, mb_id);
+               rc = virtio_mem_sbm_remove_mb(vm, mb_id);
                BUG_ON(rc);
                mutex_lock(&vm->hotplug_mutex);
        }
@@ -1300,38 +1849,31 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm,
  *
  * Will modify the state of the memory block.
  */
-static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm,
-                                         unsigned long mb_id, int sb_id,
-                                         int count)
+static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
+                                          unsigned long mb_id, int sb_id,
+                                          int count)
 {
-       const unsigned long nr_pages = PFN_DOWN(vm->subblock_size) * count;
+       const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count;
        unsigned long start_pfn;
        int rc;
 
        start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
-                            sb_id * vm->subblock_size);
-       rc = alloc_contig_range(start_pfn, start_pfn + nr_pages,
-                               MIGRATE_MOVABLE, GFP_KERNEL);
-       if (rc == -ENOMEM)
-               /* whoops, out of memory */
-               return rc;
-       if (rc)
-               return -EBUSY;
+                            sb_id * vm->sbm.sb_size);
 
-       /* Mark it as fake-offline before unplugging it */
-       virtio_mem_set_fake_offline(start_pfn, nr_pages, true);
-       adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
+       rc = virtio_mem_fake_offline(start_pfn, nr_pages);
+       if (rc)
+               return rc;
 
        /* Try to unplug the allocated memory */
-       rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count);
+       rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
        if (rc) {
                /* Return the memory to the buddy. */
                virtio_mem_fake_online(start_pfn, nr_pages);
                return rc;
        }
 
-       virtio_mem_mb_set_state(vm, mb_id,
-                               VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
+       virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                   VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL);
        return 0;
 }
 
@@ -1345,34 +1887,34 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm,
  * Note: Can fail after some subblocks were successfully unplugged. Can
  *       return 0 even if subblocks were busy and could not get unplugged.
  */
-static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm,
-                                             unsigned long mb_id,
-                                             uint64_t *nb_sb)
+static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
+                                              unsigned long mb_id,
+                                              uint64_t *nb_sb)
 {
        int rc, sb_id;
 
        /* If possible, try to unplug the complete block in one shot. */
-       if (*nb_sb >= vm->nb_sb_per_mb &&
-           virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
-               rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0,
-                                                   vm->nb_sb_per_mb);
+       if (*nb_sb >= vm->sbm.sbs_per_mb &&
+           virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
+               rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0,
+                                                    vm->sbm.sbs_per_mb);
                if (!rc) {
-                       *nb_sb -= vm->nb_sb_per_mb;
+                       *nb_sb -= vm->sbm.sbs_per_mb;
                        goto unplugged;
                } else if (rc != -EBUSY)
                        return rc;
        }
 
        /* Fallback to single subblocks. */
-       for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
+       for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
                /* Find the next candidate subblock */
                while (sb_id >= 0 &&
-                      !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+                      !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
                        sb_id--;
                if (sb_id < 0)
                        break;
 
-               rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, sb_id, 1);
+               rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1);
                if (rc == -EBUSY)
                        continue;
                else if (rc)
@@ -1386,24 +1928,21 @@ unplugged:
         * remove it. This will usually not fail, as no memory is in use
         * anymore - however some other notifiers might NACK the request.
         */
-       if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) {
+       if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
                mutex_unlock(&vm->hotplug_mutex);
-               rc = virtio_mem_mb_offline_and_remove(vm, mb_id);
+               rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
                mutex_lock(&vm->hotplug_mutex);
                if (!rc)
-                       virtio_mem_mb_set_state(vm, mb_id,
-                                               VIRTIO_MEM_MB_STATE_UNUSED);
+                       virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                                   VIRTIO_MEM_SBM_MB_UNUSED);
        }
 
        return 0;
 }
 
-/*
- * Try to unplug the requested amount of memory.
- */
-static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
+static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
 {
-       uint64_t nb_sb = diff / vm->subblock_size;
+       uint64_t nb_sb = diff / vm->sbm.sb_size;
        unsigned long mb_id;
        int rc;
 
@@ -1418,20 +1957,17 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
        mutex_lock(&vm->hotplug_mutex);
 
        /* Try to unplug subblocks of partially plugged offline blocks. */
-       virtio_mem_for_each_mb_state_rev(vm, mb_id,
-                                        VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
-               rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id,
-                                                        &nb_sb);
+       virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
+                                      VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
+               rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
                if (rc || !nb_sb)
                        goto out_unlock;
                cond_resched();
        }
 
        /* Try to unplug subblocks of plugged offline blocks. */
-       virtio_mem_for_each_mb_state_rev(vm, mb_id,
-                                        VIRTIO_MEM_MB_STATE_OFFLINE) {
-               rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id,
-                                                        &nb_sb);
+       virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) {
+               rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb);
                if (rc || !nb_sb)
                        goto out_unlock;
                cond_resched();
@@ -1443,10 +1979,9 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
        }
 
        /* Try to unplug subblocks of partially plugged online blocks. */
-       virtio_mem_for_each_mb_state_rev(vm, mb_id,
-                                        VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
-               rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
-                                                       &nb_sb);
+       virtio_mem_sbm_for_each_mb_rev(vm, mb_id,
+                                      VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) {
+               rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
                if (rc || !nb_sb)
                        goto out_unlock;
                mutex_unlock(&vm->hotplug_mutex);
@@ -1455,10 +1990,8 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
        }
 
        /* Try to unplug subblocks of plugged online blocks. */
-       virtio_mem_for_each_mb_state_rev(vm, mb_id,
-                                        VIRTIO_MEM_MB_STATE_ONLINE) {
-               rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
-                                                       &nb_sb);
+       virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) {
+               rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb);
                if (rc || !nb_sb)
                        goto out_unlock;
                mutex_unlock(&vm->hotplug_mutex);
@@ -1474,19 +2007,211 @@ out_unlock:
 }
 
 /*
+ * Try to offline and remove a big block from Linux and unplug it. Will fail
+ * with -EBUSY if some memory is busy and cannot get unplugged.
+ *
+ * Will modify the state of the memory block. Might temporarily drop the
+ * hotplug_mutex.
+ */
+static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
+                                                      unsigned long bb_id)
+{
+       const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
+       const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+       unsigned long end_pfn = start_pfn + nr_pages;
+       unsigned long pfn;
+       struct page *page;
+       int rc;
+
+       if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+                        VIRTIO_MEM_BBM_BB_ADDED))
+               return -EINVAL;
+
+       if (bbm_safe_unplug) {
+               /*
+                * Start by fake-offlining all memory. Once we marked the device
+                * block as fake-offline, all newly onlined memory will
+                * automatically be kept fake-offline. Protect from concurrent
+                * onlining/offlining until we have a consistent state.
+                */
+               mutex_lock(&vm->hotplug_mutex);
+               virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                           VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
+
+               for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+                       page = pfn_to_online_page(pfn);
+                       if (!page)
+                               continue;
+
+                       rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
+                       if (rc) {
+                               end_pfn = pfn;
+                               goto rollback_safe_unplug;
+                       }
+               }
+               mutex_unlock(&vm->hotplug_mutex);
+       }
+
+       rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
+       if (rc) {
+               if (bbm_safe_unplug) {
+                       mutex_lock(&vm->hotplug_mutex);
+                       goto rollback_safe_unplug;
+               }
+               return rc;
+       }
+
+       rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
+       if (rc)
+               virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                           VIRTIO_MEM_BBM_BB_PLUGGED);
+       else
+               virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                           VIRTIO_MEM_BBM_BB_UNUSED);
+       return rc;
+
+rollback_safe_unplug:
+       for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+               page = pfn_to_online_page(pfn);
+               if (!page)
+                       continue;
+               virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
+       }
+       virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
+       mutex_unlock(&vm->hotplug_mutex);
+       return rc;
+}
+
+/*
+ * Try to remove a big block from Linux and unplug it. Will fail with
+ * -EBUSY if some memory is online.
+ *
+ * Will modify the state of the memory block.
+ */
+static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm,
+                                              unsigned long bb_id)
+{
+       int rc;
+
+       if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
+                        VIRTIO_MEM_BBM_BB_ADDED))
+               return -EINVAL;
+
+       rc = virtio_mem_bbm_remove_bb(vm, bb_id);
+       if (rc)
+               return -EBUSY;
+
+       rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
+       if (rc)
+               virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                           VIRTIO_MEM_BBM_BB_PLUGGED);
+       else
+               virtio_mem_bbm_set_bb_state(vm, bb_id,
+                                           VIRTIO_MEM_BBM_BB_UNUSED);
+       return rc;
+}
+
+/*
+ * Test if a big block is completely offline.
+ */
+static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
+                                        unsigned long bb_id)
+{
+       const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
+       const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
+       unsigned long pfn;
+
+       for (pfn = start_pfn; pfn < start_pfn + nr_pages;
+            pfn += PAGES_PER_SECTION) {
+               if (pfn_to_online_page(pfn))
+                       return false;
+       }
+
+       return true;
+}
+
+static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
+{
+       uint64_t nb_bb = diff / vm->bbm.bb_size;
+       uint64_t bb_id;
+       int rc;
+
+       if (!nb_bb)
+               return 0;
+
+       /* Try to unplug completely offline big blocks first. */
+       virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
+               cond_resched();
+               /*
+                * As we're holding no locks, this check is racy as memory
+                * can get onlined in the meantime - but we'll fail gracefully.
+                */
+               if (!virtio_mem_bbm_bb_is_offline(vm, bb_id))
+                       continue;
+               rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id);
+               if (rc == -EBUSY)
+                       continue;
+               if (!rc)
+                       nb_bb--;
+               if (rc || !nb_bb)
+                       return rc;
+       }
+
+       if (!unplug_online)
+               return 0;
+
+       /* Try to unplug any big blocks. */
+       virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
+               cond_resched();
+               rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
+               if (rc == -EBUSY)
+                       continue;
+               if (!rc)
+                       nb_bb--;
+               if (rc || !nb_bb)
+                       return rc;
+       }
+
+       return nb_bb ? -EBUSY : 0;
+}
+
+/*
+ * Try to unplug the requested amount of memory.
+ */
+static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
+{
+       if (vm->in_sbm)
+               return virtio_mem_sbm_unplug_request(vm, diff);
+       return virtio_mem_bbm_unplug_request(vm, diff);
+}
+
+/*
  * Try to unplug all blocks that couldn't be unplugged before, for example,
  * because the hypervisor was busy.
  */
 static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
 {
-       unsigned long mb_id;
+       unsigned long id;
        int rc;
 
-       virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_PLUGGED) {
-               rc = virtio_mem_mb_unplug(vm, mb_id);
+       if (!vm->in_sbm) {
+               virtio_mem_bbm_for_each_bb(vm, id,
+                                          VIRTIO_MEM_BBM_BB_PLUGGED) {
+                       rc = virtio_mem_bbm_unplug_bb(vm, id);
+                       if (rc)
+                               return rc;
+                       virtio_mem_bbm_set_bb_state(vm, id,
+                                                   VIRTIO_MEM_BBM_BB_UNUSED);
+               }
+               return 0;
+       }
+
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) {
+               rc = virtio_mem_sbm_unplug_mb(vm, id);
                if (rc)
                        return rc;
-               virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED);
+               virtio_mem_sbm_set_mb_state(vm, id,
+                                           VIRTIO_MEM_SBM_MB_UNUSED);
        }
 
        return 0;
@@ -1511,7 +2236,13 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
                        usable_region_size, &usable_region_size);
        end_addr = vm->addr + usable_region_size;
        end_addr = min(end_addr, phys_limit);
-       vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1;
+
+       if (vm->in_sbm)
+               vm->sbm.last_usable_mb_id =
+                                        virtio_mem_phys_to_mb_id(end_addr) - 1;
+       else
+               vm->bbm.last_usable_bb_id =
+                                    virtio_mem_phys_to_bb_id(vm, end_addr) - 1;
 
        /* see if there is a request to change the size */
        virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
@@ -1535,6 +2266,7 @@ static void virtio_mem_run_wq(struct work_struct *work)
        if (vm->broken)
                return;
 
+       atomic_set(&vm->wq_active, 1);
 retry:
        rc = 0;
 
@@ -1595,6 +2327,8 @@ retry:
                        "unknown error, marking device broken: %d\n", rc);
                vm->broken = true;
        }
+
+       atomic_set(&vm->wq_active, 0);
 }
 
 static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer)
@@ -1631,6 +2365,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm)
 static int virtio_mem_init(struct virtio_mem *vm)
 {
        const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
+       uint64_t sb_size, addr;
        uint16_t node_id;
 
        if (!vm->vdev->config->get) {
@@ -1659,15 +2394,9 @@ static int virtio_mem_init(struct virtio_mem *vm)
        virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
                        &vm->region_size);
 
-       /*
-        * We always hotplug memory in memory block granularity. This way,
-        * we have to wait for exactly one memory block to online.
-        */
-       if (vm->device_block_size > memory_block_size_bytes()) {
-               dev_err(&vm->vdev->dev,
-                       "The block size is not supported (too big).\n");
-               return -EINVAL;
-       }
+       /* Determine the nid for the device based on the lowest address. */
+       if (vm->nid == NUMA_NO_NODE)
+               vm->nid = memory_add_physaddr_to_nid(vm->addr);
 
        /* bad device setup - warn only */
        if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
@@ -1681,23 +2410,57 @@ static int virtio_mem_init(struct virtio_mem *vm)
                         "Some memory is not addressable. This can make some memory unusable.\n");
 
        /*
-        * Calculate the subblock size:
-        * - At least MAX_ORDER - 1 / pageblock_order.
-        * - At least the device block size.
-        * In the worst case, a single subblock per memory block.
+        * We want subblocks to span at least MAX_ORDER_NR_PAGES and
+        * pageblock_nr_pages pages. This:
+        * - Simplifies our page onlining code (virtio_mem_online_page_cb)
+        *   and fake page onlining code (virtio_mem_fake_online).
+        * - Is required for now for alloc_contig_range() to work reliably -
+        *   it doesn't properly handle smaller granularity on ZONE_NORMAL.
         */
-       vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1,
-                                                    pageblock_order);
-       vm->subblock_size = max_t(uint64_t, vm->device_block_size,
-                                 vm->subblock_size);
-       vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size;
-
-       /* Round up to the next full memory block */
-       vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 +
-                                                  memory_block_size_bytes());
-       vm->next_mb_id = vm->first_mb_id;
-       vm->last_mb_id = virtio_mem_phys_to_mb_id(vm->addr +
-                        vm->region_size) - 1;
+       sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES,
+                       pageblock_nr_pages) * PAGE_SIZE;
+       sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
+
+       if (sb_size < memory_block_size_bytes() && !force_bbm) {
+               /* SBM: At least two subblocks per Linux memory block. */
+               vm->in_sbm = true;
+               vm->sbm.sb_size = sb_size;
+               vm->sbm.sbs_per_mb = memory_block_size_bytes() /
+                                    vm->sbm.sb_size;
+
+               /* Round up to the next full memory block */
+               addr = vm->addr + memory_block_size_bytes() - 1;
+               vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
+               vm->sbm.next_mb_id = vm->sbm.first_mb_id;
+       } else {
+               /* BBM: At least one Linux memory block. */
+               vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size,
+                                       memory_block_size_bytes());
+
+               if (bbm_block_size) {
+                       if (!is_power_of_2(bbm_block_size)) {
+                               dev_warn(&vm->vdev->dev,
+                                        "bbm_block_size is not a power of 2");
+                       } else if (bbm_block_size < vm->bbm.bb_size) {
+                               dev_warn(&vm->vdev->dev,
+                                        "bbm_block_size is too small");
+                       } else {
+                               vm->bbm.bb_size = bbm_block_size;
+                       }
+               }
+
+               /* Round up to the next aligned big block */
+               addr = vm->addr + vm->bbm.bb_size - 1;
+               vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
+               vm->bbm.next_bb_id = vm->bbm.first_bb_id;
+       }
+
+       /* Prepare the offline threshold - make sure we can add two blocks. */
+       vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
+                                     VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
+       /* In BBM, we also want at least two big blocks. */
+       vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
+                                     vm->offline_threshold);
 
        dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
        dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
@@ -1705,9 +2468,13 @@ static int virtio_mem_init(struct virtio_mem *vm)
                 (unsigned long long)vm->device_block_size);
        dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
                 memory_block_size_bytes());
-       dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
-                (unsigned long long)vm->subblock_size);
-       if (vm->nid != NUMA_NO_NODE)
+       if (vm->in_sbm)
+               dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
+                        (unsigned long long)vm->sbm.sb_size);
+       else
+               dev_info(&vm->vdev->dev, "big block size: 0x%llx",
+                        (unsigned long long)vm->bbm.bb_size);
+       if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA))
                dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
 
        return 0;
@@ -1753,6 +2520,20 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm)
        vm->parent_resource = NULL;
 }
 
+static int virtio_mem_range_has_system_ram(struct resource *res, void *arg)
+{
+       return 1;
+}
+
+static bool virtio_mem_has_memory_added(struct virtio_mem *vm)
+{
+       const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+       return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr,
+                                  vm->addr + vm->region_size, NULL,
+                                  virtio_mem_range_has_system_ram) == 1;
+}
+
 static int virtio_mem_probe(struct virtio_device *vdev)
 {
        struct virtio_mem *vm;
@@ -1849,21 +2630,24 @@ static void virtio_mem_remove(struct virtio_device *vdev)
        cancel_work_sync(&vm->wq);
        hrtimer_cancel(&vm->retry_timer);
 
-       /*
-        * After we unregistered our callbacks, user space can online partially
-        * plugged offline blocks. Make sure to remove them.
-        */
-       virtio_mem_for_each_mb_state(vm, mb_id,
-                                    VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) {
-               rc = virtio_mem_mb_remove(vm, mb_id);
-               BUG_ON(rc);
-               virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED);
+       if (vm->in_sbm) {
+               /*
+                * After we unregistered our callbacks, user space can online
+                * partially plugged offline blocks. Make sure to remove them.
+                */
+               virtio_mem_sbm_for_each_mb(vm, mb_id,
+                                          VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
+                       rc = virtio_mem_sbm_remove_mb(vm, mb_id);
+                       BUG_ON(rc);
+                       virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                                   VIRTIO_MEM_SBM_MB_UNUSED);
+               }
+               /*
+                * After we unregistered our callbacks, user space can no longer
+                * offline partially plugged online memory blocks. No need to
+                * worry about them.
+                */
        }
-       /*
-        * After we unregistered our callbacks, user space can no longer
-        * offline partially plugged online memory blocks. No need to worry
-        * about them.
-        */
 
        /* unregister callbacks */
        unregister_virtio_mem_device(vm);
@@ -1874,10 +2658,7 @@ static void virtio_mem_remove(struct virtio_device *vdev)
         * the system. And there is no way to stop the driver/device from going
         * away. Warn at least.
         */
-       if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] ||
-           vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] ||
-           vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] ||
-           vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) {
+       if (virtio_mem_has_memory_added(vm)) {
                dev_warn(&vdev->dev, "device still has system memory added\n");
        } else {
                virtio_mem_delete_resource(vm);
@@ -1885,8 +2666,12 @@ static void virtio_mem_remove(struct virtio_device *vdev)
        }
 
        /* remove all tracking data - no locking needed */
-       vfree(vm->mb_state);
-       vfree(vm->sb_bitmap);
+       if (vm->in_sbm) {
+               vfree(vm->sbm.mb_states);
+               vfree(vm->sbm.sb_states);
+       } else {
+               vfree(vm->bbm.bb_states);
+       }
 
        /* reset the device and cleanup the queues */
        vdev->config->reset(vdev);
index becc776..71e16b5 100644 (file)
@@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
        vq->num_added = 0;
        vq->packed_ring = true;
        vq->use_dma_api = vring_use_dma_api(vdev);
-       list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
        vq->in_use = false;
        vq->last_add_time_valid = false;
@@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
                        cpu_to_le16(vq->packed.event_flags_shadow);
        }
 
+       list_add_tail(&vq->vq.list, &vdev->vqs);
        return &vq->vq;
 
 err_desc_extra:
@@ -1676,9 +1676,9 @@ err_desc_extra:
 err_desc_state:
        kfree(vq);
 err_vq:
-       vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr);
+       vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
 err_device:
-       vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr);
+       vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
 err_driver:
        vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
 err_ring:
@@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        vq->last_used_idx = 0;
        vq->num_added = 0;
        vq->use_dma_api = vring_use_dma_api(vdev);
-       list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
        vq->in_use = false;
        vq->last_add_time_valid = false;
@@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        memset(vq->split.desc_state, 0, vring.num *
                        sizeof(struct vring_desc_state_split));
 
+       list_add_tail(&vq->vq.list, &vdev->vqs);
        return &vq->vq;
 }
 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
index f22e373..7ff941e 100644 (file)
@@ -386,6 +386,7 @@ config ARM_SBSA_WATCHDOG
 config ARMADA_37XX_WATCHDOG
        tristate "Armada 37xx watchdog"
        depends on ARCH_MVEBU || COMPILE_TEST
+       depends on HAS_IOMEM
        select MFD_SYSCON
        select WATCHDOG_CORE
        help
@@ -631,7 +632,7 @@ config SUNXI_WATCHDOG
 
 config COH901327_WATCHDOG
        bool "ST-Ericsson COH 901 327 watchdog"
-       depends on ARCH_U300 || (ARM && COMPILE_TEST)
+       depends on ARCH_U300 || (ARM && COMMON_CLK && COMPILE_TEST)
        default y if MACH_U300
        select WATCHDOG_CORE
        help
@@ -789,6 +790,7 @@ config MOXART_WDT
 
 config SIRFSOC_WATCHDOG
        tristate "SiRFSOC watchdog"
+       depends on HAS_IOMEM
        depends on ARCH_SIRF || COMPILE_TEST
        select WATCHDOG_CORE
        default y
@@ -1696,16 +1698,6 @@ config WDT_MTX1
          Hardware driver for the MTX-1 boards. This is a watchdog timer that
          will reboot the machine after a 100 seconds timer expired.
 
-config PNX833X_WDT
-       tristate "PNX833x Hardware Watchdog"
-       depends on SOC_PNX8335
-       depends on BROKEN
-       help
-         Hardware driver for the PNX833x's watchdog. This is a
-         watchdog timer that will reboot the machine after a programmable
-         timer has expired and no process has written to /dev/watchdog during
-         that time.
-
 config SIBYTE_WDOG
        tristate "Sibyte SoC hardware watchdog"
        depends on CPU_SB1 || (MIPS && COMPILE_TEST)
index 071a2e5..5c74ee1 100644 (file)
@@ -161,7 +161,6 @@ obj-$(CONFIG_RC32434_WDT) += rc32434_wdt.o
 obj-$(CONFIG_INDYDOG) += indydog.o
 obj-$(CONFIG_JZ4740_WDT) += jz4740_wdt.o
 obj-$(CONFIG_WDT_MTX1) += mtx-1_wdt.o
-obj-$(CONFIG_PNX833X_WDT) += pnx833x_wdt.o
 obj-$(CONFIG_SIBYTE_WDOG) += sb_wdog.o
 obj-$(CONFIG_AR7_WDT) += ar7_wdt.o
 obj-$(CONFIG_TXX9_WDT) += txx9wdt.o
index 8341892..0b699c7 100644 (file)
@@ -150,8 +150,6 @@ static long geodewdt_ioctl(struct file *file, unsigned int cmd,
        case WDIOC_GETSUPPORT:
                return copy_to_user(argp, &ident,
                                    sizeof(ident)) ? -EFAULT : 0;
-               break;
-
        case WDIOC_GETSTATUS:
        case WDIOC_GETBOOTSTATUS:
                return put_user(0, p);
index 7d34bcf..cbd1498 100644 (file)
@@ -21,8 +21,9 @@
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <asm/nmi.h>
+#include <linux/crash_dump.h>
 
-#define HPWDT_VERSION                  "2.0.3"
+#define HPWDT_VERSION                  "2.0.4"
 #define SECS_TO_TICKS(secs)            ((secs) * 1000 / 128)
 #define TICKS_TO_SECS(ticks)           ((ticks) * 128 / 1000)
 #define HPWDT_MAX_TICKS                        65535
@@ -334,6 +335,11 @@ static int hpwdt_init_one(struct pci_dev *dev,
        watchdog_set_nowayout(&hpwdt_dev, nowayout);
        watchdog_init_timeout(&hpwdt_dev, soft_margin, NULL);
 
+       if (is_kdump_kernel()) {
+               pretimeout = 0;
+               kdumptimeout = 0;
+       }
+
        if (pretimeout && hpwdt_dev.timeout <= PRETIMEOUT_SEC) {
                dev_warn(&dev->dev, "timeout <= pretimeout. Setting pretimeout to zero\n");
                pretimeout = 0;
index a370a18..bf31d7b 100644 (file)
@@ -40,8 +40,6 @@
  *     Includes, defines, variables, module parameters, ...
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 /* Module and version information */
 #define DRV_NAME       "iTCO_wdt"
 #define DRV_VERSION    "1.11"
@@ -279,7 +277,7 @@ static int iTCO_wdt_start(struct watchdog_device *wd_dev)
        /* disable chipset's NO_REBOOT bit */
        if (p->update_no_reboot_bit(p->no_reboot_priv, false)) {
                spin_unlock(&p->io_lock);
-               pr_err("failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n");
+               dev_err(wd_dev->parent, "failed to reset NO_REBOOT flag, reboot disabled by hardware/BIOS\n");
                return -EIO;
        }
 
@@ -510,7 +508,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
        /* Check chipset's NO_REBOOT bit */
        if (p->update_no_reboot_bit(p->no_reboot_priv, false) &&
            iTCO_vendor_check_noreboot_on()) {
-               pr_info("unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n");
+               dev_info(dev, "unable to reset NO_REBOOT flag, device disabled by hardware/BIOS\n");
                return -ENODEV; /* Cannot reset NO_REBOOT bit */
        }
 
@@ -530,12 +528,12 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
        if (!devm_request_region(dev, p->tco_res->start,
                                 resource_size(p->tco_res),
                                 pdev->name)) {
-               pr_err("I/O address 0x%04llx already in use, device disabled\n",
+               dev_err(dev, "I/O address 0x%04llx already in use, device disabled\n",
                       (u64)TCOBASE(p));
                return -EBUSY;
        }
 
-       pr_info("Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n",
+       dev_info(dev, "Found a %s TCO device (Version=%d, TCOBASE=0x%04llx)\n",
                pdata->name, pdata->version, (u64)TCOBASE(p));
 
        /* Clear out the (probably old) status */
@@ -558,7 +556,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
                break;
        }
 
-       p->wddev.info = &ident,
+       p->wddev.info = &ident,
        p->wddev.ops = &iTCO_wdt_ops,
        p->wddev.bootstatus = 0;
        p->wddev.timeout = WATCHDOG_TIMEOUT;
@@ -575,7 +573,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
           if not reset to the default */
        if (iTCO_wdt_set_timeout(&p->wddev, heartbeat)) {
                iTCO_wdt_set_timeout(&p->wddev, WATCHDOG_TIMEOUT);
-               pr_info("timeout value out of range, using %d\n",
+               dev_info(dev, "timeout value out of range, using %d\n",
                        WATCHDOG_TIMEOUT);
        }
 
@@ -583,11 +581,11 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
        watchdog_stop_on_unregister(&p->wddev);
        ret = devm_watchdog_register_device(dev, &p->wddev);
        if (ret != 0) {
-               pr_err("cannot register watchdog device (err=%d)\n", ret);
+               dev_err(dev, "cannot register watchdog device (err=%d)\n", ret);
                return ret;
        }
 
-       pr_info("initialized. heartbeat=%d sec (nowayout=%d)\n",
+       dev_info(dev, "initialized. heartbeat=%d sec (nowayout=%d)\n",
                heartbeat, nowayout);
 
        return 0;
@@ -651,21 +649,7 @@ static struct platform_driver iTCO_wdt_driver = {
        },
 };
 
-static int __init iTCO_wdt_init_module(void)
-{
-       pr_info("Intel TCO WatchDog Timer Driver v%s\n", DRV_VERSION);
-
-       return platform_driver_register(&iTCO_wdt_driver);
-}
-
-static void __exit iTCO_wdt_cleanup_module(void)
-{
-       platform_driver_unregister(&iTCO_wdt_driver);
-       pr_info("Watchdog Module Unloaded\n");
-}
-
-module_init(iTCO_wdt_init_module);
-module_exit(iTCO_wdt_cleanup_module);
+module_platform_driver(iTCO_wdt_driver);
 
 MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>");
 MODULE_DESCRIPTION("Intel TCO WatchDog Timer Driver");
index 3fc457b..2f7ded3 100644 (file)
@@ -175,8 +175,8 @@ static int mpc8xxx_wdt_probe(struct platform_device *ofdev)
 
        spin_lock_init(&ddata->lock);
 
-       ddata->wdd.info = &mpc8xxx_wdt_info,
-       ddata->wdd.ops = &mpc8xxx_wdt_ops,
+       ddata->wdd.info = &mpc8xxx_wdt_info;
+       ddata->wdd.ops = &mpc8xxx_wdt_ops;
 
        ddata->wdd.timeout = WATCHDOG_TIMEOUT;
        watchdog_init_timeout(&ddata->wdd, timeout, dev);
diff --git a/drivers/watchdog/pnx833x_wdt.c b/drivers/watchdog/pnx833x_wdt.c
deleted file mode 100644 (file)
index 4097d07..0000000
+++ /dev/null
@@ -1,277 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  PNX833x Hardware Watchdog Driver
- *  Copyright 2008 NXP Semiconductors
- *  Daniel Laird <daniel.j.laird@nxp.com>
- *  Andre McCurdy <andre.mccurdy@nxp.com>
- *
- *  Heavily based upon - IndyDog       0.3
- *  A Hardware Watchdog Device for SGI IP22
- *
- * (c) Copyright 2002 Guido Guenther <agx@sigxcpu.org>, All Rights Reserved.
- *
- * based on softdog.c by Alan Cox <alan@redhat.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/miscdevice.h>
-#include <linux/watchdog.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/init.h>
-#include <asm/mach-pnx833x/pnx833x.h>
-
-#define WATCHDOG_TIMEOUT 30            /* 30 sec Maximum timeout */
-#define WATCHDOG_COUNT_FREQUENCY 68000000U /* Watchdog counts at 68MHZ. */
-#define        PNX_WATCHDOG_TIMEOUT    (WATCHDOG_TIMEOUT * WATCHDOG_COUNT_FREQUENCY)
-#define PNX_TIMEOUT_VALUE      2040000000U
-
-/** CONFIG block */
-#define PNX833X_CONFIG                      (0x07000U)
-#define PNX833X_CONFIG_CPU_WATCHDOG         (0x54)
-#define PNX833X_CONFIG_CPU_WATCHDOG_COMPARE (0x58)
-#define PNX833X_CONFIG_CPU_COUNTERS_CONTROL (0x1c)
-
-/** RESET block */
-#define PNX833X_RESET                       (0x08000U)
-#define PNX833X_RESET_CONFIG                (0x08)
-
-static int pnx833x_wdt_alive;
-
-/* Set default timeout in MHZ.*/
-static int pnx833x_wdt_timeout = PNX_WATCHDOG_TIMEOUT;
-module_param(pnx833x_wdt_timeout, int, 0);
-MODULE_PARM_DESC(timeout, "Watchdog timeout in Mhz. (68Mhz clock), default="
-                       __MODULE_STRING(PNX_TIMEOUT_VALUE) "(30 seconds).");
-
-static bool nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, bool, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
-                                       __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-#define START_DEFAULT  1
-static int start_enabled = START_DEFAULT;
-module_param(start_enabled, int, 0);
-MODULE_PARM_DESC(start_enabled, "Watchdog is started on module insertion "
-                               "(default=" __MODULE_STRING(START_DEFAULT) ")");
-
-static void pnx833x_wdt_start(void)
-{
-       /* Enable watchdog causing reset. */
-       PNX833X_REG(PNX833X_RESET + PNX833X_RESET_CONFIG) |= 0x1;
-       /* Set timeout.*/
-       PNX833X_REG(PNX833X_CONFIG +
-               PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = pnx833x_wdt_timeout;
-       /* Enable watchdog. */
-       PNX833X_REG(PNX833X_CONFIG +
-                               PNX833X_CONFIG_CPU_COUNTERS_CONTROL) |= 0x1;
-
-       pr_info("Started watchdog timer\n");
-}
-
-static void pnx833x_wdt_stop(void)
-{
-       /* Disable watchdog causing reset. */
-       PNX833X_REG(PNX833X_RESET + PNX833X_CONFIG) &= 0xFFFFFFFE;
-       /* Disable watchdog.*/
-       PNX833X_REG(PNX833X_CONFIG +
-                       PNX833X_CONFIG_CPU_COUNTERS_CONTROL) &= 0xFFFFFFFE;
-
-       pr_info("Stopped watchdog timer\n");
-}
-
-static void pnx833x_wdt_ping(void)
-{
-       PNX833X_REG(PNX833X_CONFIG +
-               PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = pnx833x_wdt_timeout;
-}
-
-/*
- *     Allow only one person to hold it open
- */
-static int pnx833x_wdt_open(struct inode *inode, struct file *file)
-{
-       if (test_and_set_bit(0, &pnx833x_wdt_alive))
-               return -EBUSY;
-
-       if (nowayout)
-               __module_get(THIS_MODULE);
-
-       /* Activate timer */
-       if (!start_enabled)
-               pnx833x_wdt_start();
-
-       pnx833x_wdt_ping();
-
-       pr_info("Started watchdog timer\n");
-
-       return stream_open(inode, file);
-}
-
-static int pnx833x_wdt_release(struct inode *inode, struct file *file)
-{
-       /* Shut off the timer.
-        * Lock it in if it's a module and we defined ...NOWAYOUT */
-       if (!nowayout)
-               pnx833x_wdt_stop(); /* Turn the WDT off */
-
-       clear_bit(0, &pnx833x_wdt_alive);
-       return 0;
-}
-
-static ssize_t pnx833x_wdt_write(struct file *file, const char *data, size_t len, loff_t *ppos)
-{
-       /* Refresh the timer. */
-       if (len)
-               pnx833x_wdt_ping();
-
-       return len;
-}
-
-static long pnx833x_wdt_ioctl(struct file *file, unsigned int cmd,
-                                                       unsigned long arg)
-{
-       int options, new_timeout = 0;
-       uint32_t timeout, timeout_left = 0;
-
-       static const struct watchdog_info ident = {
-               .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
-               .firmware_version = 0,
-               .identity = "Hardware Watchdog for PNX833x",
-       };
-
-       switch (cmd) {
-       default:
-               return -ENOTTY;
-
-       case WDIOC_GETSUPPORT:
-               if (copy_to_user((struct watchdog_info *)arg,
-                                &ident, sizeof(ident)))
-                       return -EFAULT;
-               return 0;
-
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, (int *)arg);
-
-       case WDIOC_SETOPTIONS:
-               if (get_user(options, (int *)arg))
-                       return -EFAULT;
-
-               if (options & WDIOS_DISABLECARD)
-                       pnx833x_wdt_stop();
-
-               if (options & WDIOS_ENABLECARD)
-                       pnx833x_wdt_start();
-
-               return 0;
-
-       case WDIOC_KEEPALIVE:
-               pnx833x_wdt_ping();
-               return 0;
-
-       case WDIOC_SETTIMEOUT:
-       {
-               if (get_user(new_timeout, (int *)arg))
-                       return -EFAULT;
-
-               pnx833x_wdt_timeout = new_timeout;
-               PNX833X_REG(PNX833X_CONFIG +
-                       PNX833X_CONFIG_CPU_WATCHDOG_COMPARE) = new_timeout;
-               return put_user(new_timeout, (int *)arg);
-       }
-
-       case WDIOC_GETTIMEOUT:
-               timeout = PNX833X_REG(PNX833X_CONFIG +
-                                       PNX833X_CONFIG_CPU_WATCHDOG_COMPARE);
-               return put_user(timeout, (int *)arg);
-
-       case WDIOC_GETTIMELEFT:
-               timeout_left = PNX833X_REG(PNX833X_CONFIG +
-                                               PNX833X_CONFIG_CPU_WATCHDOG);
-               return put_user(timeout_left, (int *)arg);
-
-       }
-}
-
-static int pnx833x_wdt_notify_sys(struct notifier_block *this,
-                                       unsigned long code, void *unused)
-{
-       if (code == SYS_DOWN || code == SYS_HALT)
-               pnx833x_wdt_stop(); /* Turn the WDT off */
-
-       return NOTIFY_DONE;
-}
-
-static const struct file_operations pnx833x_wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .write          = pnx833x_wdt_write,
-       .unlocked_ioctl = pnx833x_wdt_ioctl,
-       .compat_ioctl   = compat_ptr_ioctl,
-       .open           = pnx833x_wdt_open,
-       .release        = pnx833x_wdt_release,
-};
-
-static struct miscdevice pnx833x_wdt_miscdev = {
-       .minor          = WATCHDOG_MINOR,
-       .name           = "watchdog",
-       .fops           = &pnx833x_wdt_fops,
-};
-
-static struct notifier_block pnx833x_wdt_notifier = {
-       .notifier_call = pnx833x_wdt_notify_sys,
-};
-
-static int __init watchdog_init(void)
-{
-       int ret, cause;
-
-       /* Lets check the reason for the reset.*/
-       cause = PNX833X_REG(PNX833X_RESET);
-       /*If bit 31 is set then watchdog was cause of reset.*/
-       if (cause & 0x80000000) {
-               pr_info("The system was previously reset due to the watchdog firing - please investigate...\n");
-       }
-
-       ret = register_reboot_notifier(&pnx833x_wdt_notifier);
-       if (ret) {
-               pr_err("cannot register reboot notifier (err=%d)\n", ret);
-               return ret;
-       }
-
-       ret = misc_register(&pnx833x_wdt_miscdev);
-       if (ret) {
-               pr_err("cannot register miscdev on minor=%d (err=%d)\n",
-                      WATCHDOG_MINOR, ret);
-               unregister_reboot_notifier(&pnx833x_wdt_notifier);
-               return ret;
-       }
-
-       pr_info("Hardware Watchdog Timer for PNX833x: Version 0.1\n");
-
-       if (start_enabled)
-               pnx833x_wdt_start();
-
-       return 0;
-}
-
-static void __exit watchdog_exit(void)
-{
-       misc_deregister(&pnx833x_wdt_miscdev);
-       unregister_reboot_notifier(&pnx833x_wdt_notifier);
-}
-
-module_init(watchdog_init);
-module_exit(watchdog_exit);
-
-MODULE_AUTHOR("Daniel Laird/Andre McCurdy");
-MODULE_DESCRIPTION("Hardware Watchdog Device for PNX833x");
-MODULE_LICENSE("GPL");
index ab7465d..7cf0f2e 100644 (file)
@@ -148,10 +148,17 @@ static int qcom_wdt_restart(struct watchdog_device *wdd, unsigned long action,
         */
        wmb();
 
-       msleep(150);
+       mdelay(150);
        return 0;
 }
 
+static int qcom_wdt_is_running(struct watchdog_device *wdd)
+{
+       struct qcom_wdt *wdt = to_qcom_wdt(wdd);
+
+       return (readl(wdt_addr(wdt, WDT_EN)) & QCOM_WDT_ENABLE);
+}
+
 static const struct watchdog_ops qcom_wdt_ops = {
        .start          = qcom_wdt_start,
        .stop           = qcom_wdt_stop,
@@ -294,6 +301,17 @@ static int qcom_wdt_probe(struct platform_device *pdev)
        wdt->wdd.timeout = min(wdt->wdd.max_timeout, 30U);
        watchdog_init_timeout(&wdt->wdd, 0, dev);
 
+       /*
+        * If WDT is already running, call WDT start which
+        * will stop the WDT, set timeouts as bootloader
+        * might use different ones and set running bit
+        * to inform the WDT subsystem to ping the WDT
+        */
+       if (qcom_wdt_is_running(&wdt->wdd)) {
+               qcom_wdt_start(&wdt->wdd);
+               set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
+       }
+
        ret = devm_watchdog_register_device(dev, &wdt->wdd);
        if (ret)
                return ret;
index 836319c..359302f 100644 (file)
@@ -227,8 +227,10 @@ static int rti_wdt_probe(struct platform_device *pdev)
 
        pm_runtime_enable(dev);
        ret = pm_runtime_get_sync(dev);
-       if (ret)
+       if (ret) {
+               pm_runtime_put_noidle(dev);
                return dev_err_probe(dev, ret, "runtime pm failed\n");
+       }
 
        platform_set_drvdata(pdev, wdt);
 
index 04483d6..13db71e 100644 (file)
@@ -78,7 +78,7 @@ static int fitpc2_wdt_open(struct inode *inode, struct file *file)
        return stream_open(inode, file);
 }
 
-static ssize_t fitpc2_wdt_write(struct file *file, const char *data,
+static ssize_t fitpc2_wdt_write(struct file *file, const char __user *data,
                                                size_t len, loff_t *ppos)
 {
        size_t i;
@@ -125,16 +125,16 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd,
 
        switch (cmd) {
        case WDIOC_GETSUPPORT:
-               ret = copy_to_user((struct watchdog_info *)arg, &ident,
+               ret = copy_to_user((struct watchdog_info __user *)arg, &ident,
                                   sizeof(ident)) ? -EFAULT : 0;
                break;
 
        case WDIOC_GETSTATUS:
-               ret = put_user(0, (int *)arg);
+               ret = put_user(0, (int __user *)arg);
                break;
 
        case WDIOC_GETBOOTSTATUS:
-               ret = put_user(0, (int *)arg);
+               ret = put_user(0, (int __user *)arg);
                break;
 
        case WDIOC_KEEPALIVE:
@@ -143,7 +143,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd,
                break;
 
        case WDIOC_SETTIMEOUT:
-               ret = get_user(time, (int *)arg);
+               ret = get_user(time, (int __user *)arg);
                if (ret)
                        break;
 
@@ -157,7 +157,7 @@ static long fitpc2_wdt_ioctl(struct file *file, unsigned int cmd,
                fallthrough;
 
        case WDIOC_GETTIMEOUT:
-               ret = put_user(margin, (int *)arg);
+               ret = put_user(margin, (int __user *)arg);
                break;
        }
 
index 190d26e..958dc32 100644 (file)
@@ -291,6 +291,7 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id)
                set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
        }
 
+       watchdog_stop_on_reboot(&wdt->wdd);
        ret = watchdog_register_device(&wdt->wdd);
        if (ret)
                goto err;
index 65cb55f..4e689b6 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/bitops.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -53,7 +54,7 @@
 
 #define SPRD_WDT_CNT_HIGH_SHIFT                16
 #define SPRD_WDT_LOW_VALUE_MASK                GENMASK(15, 0)
-#define SPRD_WDT_LOAD_TIMEOUT          1000
+#define SPRD_WDT_LOAD_TIMEOUT          11
 
 struct sprd_wdt {
        void __iomem *base;
@@ -108,6 +109,23 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout,
        u32 tmr_step = timeout * SPRD_WDT_CNT_STEP;
        u32 prtmr_step = pretimeout * SPRD_WDT_CNT_STEP;
 
+       /*
+        * Checking busy bit to make sure the previous loading operation is
+        * done. According to the specification, the busy bit would be set
+        * after a new loading operation and last 2 or 3 RTC clock
+        * cycles (about 60us~92us).
+        */
+       do {
+               val = readl_relaxed(wdt->base + SPRD_WDT_INT_RAW);
+               if (!(val & SPRD_WDT_LD_BUSY_BIT))
+                       break;
+
+               usleep_range(10, 100);
+       } while (delay_cnt++ < SPRD_WDT_LOAD_TIMEOUT);
+
+       if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT)
+               return -EBUSY;
+
        sprd_wdt_unlock(wdt->base);
        writel_relaxed((tmr_step >> SPRD_WDT_CNT_HIGH_SHIFT) &
                      SPRD_WDT_LOW_VALUE_MASK, wdt->base + SPRD_WDT_LOAD_HIGH);
@@ -120,20 +138,6 @@ static int sprd_wdt_load_value(struct sprd_wdt *wdt, u32 timeout,
                       wdt->base + SPRD_WDT_IRQ_LOAD_LOW);
        sprd_wdt_lock(wdt->base);
 
-       /*
-        * Waiting the load value operation done,
-        * it needs two or three RTC clock cycles.
-        */
-       do {
-               val = readl_relaxed(wdt->base + SPRD_WDT_INT_RAW);
-               if (!(val & SPRD_WDT_LD_BUSY_BIT))
-                       break;
-
-               cpu_relax();
-       } while (delay_cnt++ < SPRD_WDT_LOAD_TIMEOUT);
-
-       if (delay_cnt >= SPRD_WDT_LOAD_TIMEOUT)
-               return -EBUSY;
        return 0;
 }
 
@@ -345,15 +349,10 @@ static int __maybe_unused sprd_wdt_pm_resume(struct device *dev)
        if (ret)
                return ret;
 
-       if (watchdog_active(&wdt->wdd)) {
+       if (watchdog_active(&wdt->wdd))
                ret = sprd_wdt_start(&wdt->wdd);
-               if (ret) {
-                       sprd_wdt_disable(wdt);
-                       return ret;
-               }
-       }
 
-       return 0;
+       return ret;
 }
 
 static const struct dev_pm_ops sprd_wdt_pm_ops = {
index 25188d6..a3436c2 100644 (file)
@@ -162,18 +162,15 @@ static int stm32_iwdg_clk_init(struct platform_device *pdev,
        u32 ret;
 
        wdt->clk_lsi = devm_clk_get(dev, "lsi");
-       if (IS_ERR(wdt->clk_lsi)) {
-               dev_err(dev, "Unable to get lsi clock\n");
-               return PTR_ERR(wdt->clk_lsi);
-       }
+       if (IS_ERR(wdt->clk_lsi))
+               return dev_err_probe(dev, PTR_ERR(wdt->clk_lsi), "Unable to get lsi clock\n");
 
        /* optional peripheral clock */
        if (wdt->data->has_pclk) {
                wdt->clk_pclk = devm_clk_get(dev, "pclk");
-               if (IS_ERR(wdt->clk_pclk)) {
-                       dev_err(dev, "Unable to get pclk clock\n");
-                       return PTR_ERR(wdt->clk_pclk);
-               }
+               if (IS_ERR(wdt->clk_pclk))
+                       return dev_err_probe(dev, PTR_ERR(wdt->clk_pclk),
+                                            "Unable to get pclk clock\n");
 
                ret = clk_prepare_enable(wdt->clk_pclk);
                if (ret) {
index 4238447..0e9a995 100644 (file)
@@ -267,15 +267,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
        }
 
        if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) {
-               wdd->reboot_nb.notifier_call = watchdog_reboot_notifier;
-
-               ret = register_reboot_notifier(&wdd->reboot_nb);
-               if (ret) {
-                       pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
-                              wdd->id, ret);
-                       watchdog_dev_unregister(wdd);
-                       ida_simple_remove(&watchdog_ida, id);
-                       return ret;
+               if (!wdd->ops->stop)
+                       pr_warn("watchdog%d: stop_on_reboot not supported\n", wdd->id);
+               else {
+                       wdd->reboot_nb.notifier_call = watchdog_reboot_notifier;
+
+                       ret = register_reboot_notifier(&wdd->reboot_nb);
+                       if (ret) {
+                               pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
+                                       wdd->id, ret);
+                               watchdog_dev_unregister(wdd);
+                               ida_simple_remove(&watchdog_ida, id);
+                               return ret;
+                       }
                }
        }
 
index 3065dd6..cec7917 100644 (file)
@@ -34,9 +34,9 @@ struct wdat_instruction {
  * @period: How long is one watchdog period in ms
  * @stopped_in_sleep: Is this watchdog stopped by the firmware in S1-S5
  * @stopped: Was the watchdog stopped by the driver in suspend
- * @actions: An array of instruction lists indexed by an action number from
- *           the WDAT table. There can be %NULL entries for not implemented
- *           actions.
+ * @instructions: An array of instruction lists indexed by an action number from
+ *                the WDAT table. There can be %NULL entries for not implemented
+ *                actions.
  */
 struct wdat_wdt {
        struct platform_device *pdev;
index babdca8..c3621b9 100644 (file)
@@ -21,7 +21,7 @@ obj-$(CONFIG_XEN_GNTDEV)              += xen-gntdev.o
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)      += xen-gntalloc.o
 obj-$(CONFIG_XENFS)                    += xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)       += sys-hypervisor.o
-obj-$(CONFIG_XEN_PVHVM)                        += platform-pci.o
+obj-$(CONFIG_XEN_PVHVM_GUEST)          += platform-pci.o
 obj-$(CONFIG_SWIOTLB_XEN)              += swiotlb-xen.o
 obj-$(CONFIG_XEN_MCE_LOG)              += mcelog.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += xen-pciback/
index cd04668..374d36d 100644 (file)
@@ -179,6 +179,7 @@ static int poweroff_nb(struct notifier_block *cb, unsigned long code, void *unus
        case SYS_HALT:
        case SYS_POWER_OFF:
                shutting_down = SHUTDOWN_POWEROFF;
+               break;
        default:
                break;
        }
index 3d681a2..9d9de62 100644 (file)
@@ -39,6 +39,48 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
 }
 
 /**
+ * v9fs_fid_find_inode - search for an open fid off of the inode list
+ * @inode: return a fid pointing to a specific inode
+ * @uid: return a fid belonging to the specified user
+ *
+ */
+
+static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+{
+       struct hlist_head *h;
+       struct p9_fid *fid, *ret = NULL;
+
+       p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode);
+
+       spin_lock(&inode->i_lock);
+       h = (struct hlist_head *)&inode->i_private;
+       hlist_for_each_entry(fid, h, ilist) {
+               if (uid_eq(fid->uid, uid)) {
+                       refcount_inc(&fid->count);
+                       ret = fid;
+                       break;
+               }
+       }
+       spin_unlock(&inode->i_lock);
+       return ret;
+}
+
+/**
+ * v9fs_open_fid_add - add an open fid to an inode
+ * @dentry: inode that the fid is being added to
+ * @fid: fid to add
+ *
+ */
+
+void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid)
+{
+       spin_lock(&inode->i_lock);
+       hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private);
+       spin_unlock(&inode->i_lock);
+}
+
+
+/**
  * v9fs_fid_find - retrieve a fid that belongs to the specified uid
  * @dentry: dentry to look for fid in
  * @uid: return fid that belongs to the specified user
@@ -54,13 +96,18 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
                 dentry, dentry, from_kuid(&init_user_ns, uid),
                 any);
        ret = NULL;
+
+       if (d_inode(dentry))
+               ret = v9fs_fid_find_inode(d_inode(dentry), uid);
+
        /* we'll recheck under lock if there's anything to look in */
-       if (dentry->d_fsdata) {
+       if (!ret && dentry->d_fsdata) {
                struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata;
                spin_lock(&dentry->d_lock);
                hlist_for_each_entry(fid, h, dlist) {
                        if (any || uid_eq(fid->uid, uid)) {
                                ret = fid;
+                               refcount_inc(&ret->count);
                                break;
                        }
                }
@@ -122,7 +169,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
        fid = v9fs_fid_find(ds, uid, any);
        if (fid) {
                /* Found the parent fid do a lookup with that */
-               fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1);
+               struct p9_fid *ofid = fid;
+
+               fid = p9_client_walk(ofid, 1, &dentry->d_name.name, 1);
+               p9_client_clunk(ofid);
                goto fid_out;
        }
        up_read(&v9ses->rename_sem);
@@ -147,8 +197,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
                v9fs_fid_add(dentry->d_sb->s_root, fid);
        }
        /* If we are root ourself just return that */
-       if (dentry->d_sb->s_root == dentry)
+       if (dentry->d_sb->s_root == dentry) {
+               refcount_inc(&fid->count);
                return fid;
+       }
        /*
         * Do a multipath walk with attached root.
         * When walking parent we need to make sure we
@@ -195,6 +247,7 @@ fid_out:
                        fid = ERR_PTR(-ENOENT);
                } else {
                        __add_fid(dentry, fid);
+                       refcount_inc(&fid->count);
                        spin_unlock(&dentry->d_lock);
                }
        }
@@ -245,11 +298,13 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 {
        int err;
-       struct p9_fid *fid;
+       struct p9_fid *fid, *ofid;
 
-       fid = clone_fid(v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0));
+       ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0);
+       fid = clone_fid(ofid);
        if (IS_ERR(fid))
                goto error_out;
+       p9_client_clunk(ofid);
        /*
         * writeback fid will only be used to write back the
         * dirty pages. We always request for the open fid in read-write
index 928b109..f7f3350 100644 (file)
@@ -15,12 +15,21 @@ static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry)
 }
 void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
+void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid);
 static inline struct p9_fid *clone_fid(struct p9_fid *fid)
 {
        return IS_ERR(fid) ? fid :  p9_client_walk(fid, 0, NULL, 1);
 }
 static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
 {
-       return clone_fid(v9fs_fid_lookup(dentry));
+       struct p9_fid *fid, *nfid;
+
+       fid = v9fs_fid_lookup(dentry);
+       if (!fid || IS_ERR(fid))
+               return fid;
+
+       nfid = clone_fid(fid);
+       p9_client_clunk(fid);
+       return nfid;
 }
 #endif
index 7d6f69a..4b42921 100644 (file)
@@ -85,6 +85,8 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
                        retval = v9fs_refresh_inode_dotl(fid, inode);
                else
                        retval = v9fs_refresh_inode(fid, inode);
+               p9_client_clunk(fid);
+
                if (retval == -ENOENT)
                        return 0;
                if (retval < 0)
index 674d22b..b6a5a0b 100644 (file)
@@ -210,8 +210,12 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
        fid = filp->private_data;
        p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
                 inode, filp, fid ? fid->fid : -1);
-       if (fid)
+       if (fid) {
+               spin_lock(&inode->i_lock);
+               hlist_del(&fid->ilist);
+               spin_unlock(&inode->i_lock);
                p9_client_clunk(fid);
+       }
        return 0;
 }
 
index be57689..649f04f 100644 (file)
@@ -46,7 +46,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
        int err;
        struct v9fs_inode *v9inode;
        struct v9fs_session_info *v9ses;
-       struct p9_fid *fid;
+       struct p9_fid *fid, *writeback_fid;
        int omode;
 
        p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
@@ -85,17 +85,18 @@ int v9fs_file_open(struct inode *inode, struct file *file)
                 * because we want write after unlink usecase
                 * to work.
                 */
-               fid = v9fs_writeback_fid(file_dentry(file));
+               writeback_fid = v9fs_writeback_fid(file_dentry(file));
                if (IS_ERR(fid)) {
                        err = PTR_ERR(fid);
                        mutex_unlock(&v9inode->v_mutex);
                        goto out_error;
                }
-               v9inode->writeback_fid = (void *) fid;
+               v9inode->writeback_fid = (void *) writeback_fid;
        }
        mutex_unlock(&v9inode->v_mutex);
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
                v9fs_cache_inode_set_cookie(inode, file);
+       v9fs_open_fid_add(inode, fid);
        return 0;
 out_error:
        p9_client_clunk(file->private_data);
index ae0c38a..4a937fa 100644 (file)
@@ -256,6 +256,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
        inode->i_rdev = rdev;
        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
        inode->i_mapping->a_ops = &v9fs_addr_operations;
+       inode->i_private = NULL;
 
        switch (mode & S_IFMT) {
        case S_IFIFO:
@@ -550,6 +551,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
        if (v9fs_proto_dotl(v9ses))
                retval = p9_client_unlinkat(dfid, dentry->d_name.name,
                                            v9fs_at_to_dotl_flags(flags));
+       p9_client_clunk(dfid);
        if (retval == -EOPNOTSUPP) {
                /* Try the one based on path */
                v9fid = v9fs_fid_clone(dentry);
@@ -570,6 +572,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
 
                v9fs_invalidate_inode_attr(inode);
                v9fs_invalidate_inode_attr(dir);
+
+               /* invalidate all fids associated with dentry */
+               /* NOTE: This will not include open fids */
+               dentry->d_op->d_release(dentry);
        }
        return retval;
 }
@@ -590,14 +596,12 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 {
        int err;
        const unsigned char *name;
-       struct p9_fid *dfid, *ofid, *fid;
+       struct p9_fid *dfid, *ofid = NULL, *fid = NULL;
        struct inode *inode;
 
        p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
 
        err = 0;
-       ofid = NULL;
-       fid = NULL;
        name = dentry->d_name.name;
        dfid = v9fs_parent_fid(dentry);
        if (IS_ERR(dfid)) {
@@ -611,12 +615,14 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
        if (IS_ERR(ofid)) {
                err = PTR_ERR(ofid);
                p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+               p9_client_clunk(dfid);
                return ERR_PTR(err);
        }
 
        err = p9_client_fcreate(ofid, name, perm, mode, extension);
        if (err < 0) {
                p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
+               p9_client_clunk(dfid);
                goto error;
        }
 
@@ -628,6 +634,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
                        p9_debug(P9_DEBUG_VFS,
                                   "p9_client_walk failed %d\n", err);
                        fid = NULL;
+                       p9_client_clunk(dfid);
                        goto error;
                }
                /*
@@ -638,11 +645,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
                        err = PTR_ERR(inode);
                        p9_debug(P9_DEBUG_VFS,
                                   "inode creation failed %d\n", err);
+                       p9_client_clunk(dfid);
                        goto error;
                }
                v9fs_fid_add(dentry, fid);
                d_instantiate(dentry, inode);
        }
+       p9_client_clunk(dfid);
        return ofid;
 error:
        if (ofid)
@@ -755,6 +764,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
         */
        name = dentry->d_name.name;
        fid = p9_client_walk(dfid, 1, &name, 1);
+       p9_client_clunk(dfid);
        if (fid == ERR_PTR(-ENOENT))
                inode = NULL;
        else if (IS_ERR(fid))
@@ -792,6 +802,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        struct v9fs_session_info *v9ses;
        struct p9_fid *fid, *inode_fid;
        struct dentry *res = NULL;
+       struct inode *inode;
 
        if (d_in_lookup(dentry)) {
                res = v9fs_vfs_lookup(dir, dentry, 0);
@@ -820,7 +831,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        }
 
        v9fs_invalidate_inode_attr(dir);
-       v9inode = V9FS_I(d_inode(dentry));
+       inode = d_inode(dentry);
+       v9inode = V9FS_I(inode);
        mutex_lock(&v9inode->v_mutex);
        if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
            !v9inode->writeback_fid &&
@@ -848,6 +860,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        file->private_data = fid;
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
                v9fs_cache_inode_set_cookie(d_inode(dentry), file);
+       v9fs_open_fid_add(inode, fid);
 
        file->f_mode |= FMODE_CREATED;
 out:
@@ -902,7 +915,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct inode *old_inode;
        struct inode *new_inode;
        struct v9fs_session_info *v9ses;
-       struct p9_fid *oldfid;
+       struct p9_fid *oldfid, *dfid;
        struct p9_fid *olddirfid;
        struct p9_fid *newdirfid;
        struct p9_wstat wstat;
@@ -919,13 +932,20 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (IS_ERR(oldfid))
                return PTR_ERR(oldfid);
 
-       olddirfid = clone_fid(v9fs_parent_fid(old_dentry));
+       dfid = v9fs_parent_fid(old_dentry);
+       olddirfid = clone_fid(dfid);
+       if (dfid && !IS_ERR(dfid))
+               p9_client_clunk(dfid);
+
        if (IS_ERR(olddirfid)) {
                retval = PTR_ERR(olddirfid);
                goto done;
        }
 
-       newdirfid = clone_fid(v9fs_parent_fid(new_dentry));
+       dfid = v9fs_parent_fid(new_dentry);
+       newdirfid = clone_fid(dfid);
+       p9_client_clunk(dfid);
+
        if (IS_ERR(newdirfid)) {
                retval = PTR_ERR(newdirfid);
                goto clunk_olddir;
@@ -982,6 +1002,7 @@ clunk_olddir:
        p9_client_clunk(olddirfid);
 
 done:
+       p9_client_clunk(oldfid);
        return retval;
 }
 
@@ -1014,6 +1035,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
                return PTR_ERR(fid);
 
        st = p9_client_stat(fid);
+       p9_client_clunk(fid);
        if (IS_ERR(st))
                return PTR_ERR(st);
 
@@ -1034,7 +1056,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
 
 static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       int retval;
+       int retval, use_dentry = 0;
        struct v9fs_session_info *v9ses;
        struct p9_fid *fid = NULL;
        struct p9_wstat wstat;
@@ -1050,8 +1072,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
                fid = iattr->ia_file->private_data;
                WARN_ON(!fid);
        }
-       if (!fid)
+       if (!fid) {
                fid = v9fs_fid_lookup(dentry);
+               use_dentry = 1;
+       }
        if(IS_ERR(fid))
                return PTR_ERR(fid);
 
@@ -1081,6 +1105,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
                filemap_write_and_wait(d_inode(dentry)->i_mapping);
 
        retval = p9_client_wstat(fid, &wstat);
+
+       if (use_dentry)
+               p9_client_clunk(fid);
+
        if (retval < 0)
                return retval;
 
@@ -1205,6 +1233,7 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry,
                return ERR_PTR(-EBADF);
 
        st = p9_client_stat(fid);
+       p9_client_clunk(fid);
        if (IS_ERR(st))
                return ERR_CAST(st);
 
index 0028ecc..823c2eb 100644 (file)
@@ -296,6 +296,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 
        /* instantiate inode and assign the unopened fid to the dentry */
        fid = p9_client_walk(dfid, 1, &name, 1);
+       p9_client_clunk(dfid);
        if (IS_ERR(fid)) {
                err = PTR_ERR(fid);
                p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
@@ -342,6 +343,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
        file->private_data = ofid;
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
                v9fs_cache_inode_set_cookie(inode, file);
+       v9fs_open_fid_add(inode, ofid);
        file->f_mode |= FMODE_CREATED;
 out:
        v9fs_put_acl(dacl, pacl);
@@ -407,7 +409,6 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
        err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
        if (err < 0)
                goto error;
-
        fid = p9_client_walk(dfid, 1, &name, 1);
        if (IS_ERR(fid)) {
                err = PTR_ERR(fid);
@@ -451,6 +452,7 @@ error:
        if (fid)
                p9_client_clunk(fid);
        v9fs_put_acl(dacl, pacl);
+       p9_client_clunk(dfid);
        return err;
 }
 
@@ -478,6 +480,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
         */
 
        st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+       p9_client_clunk(fid);
        if (IS_ERR(st))
                return PTR_ERR(st);
 
@@ -539,7 +542,7 @@ static int v9fs_mapped_iattr_valid(int iattr_valid)
 
 int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 {
-       int retval;
+       int retval, use_dentry = 0;
        struct p9_fid *fid = NULL;
        struct p9_iattr_dotl p9attr;
        struct inode *inode = d_inode(dentry);
@@ -564,8 +567,10 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
                fid = iattr->ia_file->private_data;
                WARN_ON(!fid);
        }
-       if (!fid)
+       if (!fid) {
                fid = v9fs_fid_lookup(dentry);
+               use_dentry = 1;
+       }
        if (IS_ERR(fid))
                return PTR_ERR(fid);
 
@@ -574,8 +579,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
                filemap_write_and_wait(inode->i_mapping);
 
        retval = p9_client_setattr(fid, &p9attr);
-       if (retval < 0)
+       if (retval < 0) {
+               if (use_dentry)
+                       p9_client_clunk(fid);
                return retval;
+       }
 
        if ((iattr->ia_valid & ATTR_SIZE) &&
            iattr->ia_size != i_size_read(inode))
@@ -587,9 +595,15 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
        if (iattr->ia_valid & ATTR_MODE) {
                /* We also want to update ACL when we update mode bits */
                retval = v9fs_acl_chmod(inode, fid);
-               if (retval < 0)
+               if (retval < 0) {
+                       if (use_dentry)
+                               p9_client_clunk(fid);
                        return retval;
+               }
        }
+       if (use_dentry)
+               p9_client_clunk(fid);
+
        return 0;
 }
 
@@ -741,6 +755,7 @@ error:
        if (fid)
                p9_client_clunk(fid);
 
+       p9_client_clunk(dfid);
        return err;
 }
 
@@ -769,11 +784,15 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
                return PTR_ERR(dfid);
 
        oldfid = v9fs_fid_lookup(old_dentry);
-       if (IS_ERR(oldfid))
+       if (IS_ERR(oldfid)) {
+               p9_client_clunk(dfid);
                return PTR_ERR(oldfid);
+       }
 
        err = p9_client_link(dfid, oldfid, dentry->d_name.name);
 
+       p9_client_clunk(dfid);
+       p9_client_clunk(oldfid);
        if (err < 0) {
                p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
                return err;
@@ -788,6 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
                        return PTR_ERR(fid);
 
                v9fs_refresh_inode_dotl(fid, d_inode(old_dentry));
+               p9_client_clunk(fid);
        }
        ihold(d_inode(old_dentry));
        d_instantiate(dentry, d_inode(old_dentry));
@@ -886,6 +906,8 @@ error:
        if (fid)
                p9_client_clunk(fid);
        v9fs_put_acl(dacl, pacl);
+       p9_client_clunk(dfid);
+
        return err;
 }
 
@@ -914,6 +936,7 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry,
        if (IS_ERR(fid))
                return ERR_CAST(fid);
        retval = p9_client_readlink(fid, &target);
+       p9_client_clunk(fid);
        if (retval)
                return ERR_PTR(retval);
        set_delayed_call(done, kfree_link, target);
index 9a21269..5fce6e3 100644 (file)
@@ -268,6 +268,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
        }
        res = simple_statfs(dentry, buf);
 done:
+       p9_client_clunk(fid);
        return res;
 }
 
index ac8ff8c..87217dd 100644 (file)
@@ -71,14 +71,17 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
                       void *buffer, size_t buffer_size)
 {
        struct p9_fid *fid;
+       int ret;
 
        p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n",
                 name, buffer_size);
        fid = v9fs_fid_lookup(dentry);
        if (IS_ERR(fid))
                return PTR_ERR(fid);
+       ret = v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+       p9_client_clunk(fid);
 
-       return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+       return ret;
 }
 
 /*
@@ -96,8 +99,15 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
 int v9fs_xattr_set(struct dentry *dentry, const char *name,
                   const void *value, size_t value_len, int flags)
 {
-       struct p9_fid *fid = v9fs_fid_lookup(dentry);
-       return v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+       int ret;
+       struct p9_fid *fid;
+
+       fid  = v9fs_fid_lookup(dentry);
+       if (IS_ERR(fid))
+               return PTR_ERR(fid);
+       ret = v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+       p9_client_clunk(fid);
+       return ret;
 }
 
 int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
index 604f65f..fe03cbd 100644 (file)
@@ -60,9 +60,9 @@ config CIFS_STATS2
          Enabling this option will allow more detailed statistics on SMB
          request timing to be displayed in /proc/fs/cifs/DebugData and also
          allow optional logging of slow responses to dmesg (depending on the
-         value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
-         These additional statistics may have a minor effect on performance
-         and memory utilization.
+         value of /proc/fs/cifs/cifsFYI). See Documentation/admin-guide/cifs/usage.rst
+         for more details. These additional statistics may have a minor effect
+         on performance and memory utilization.
 
          Unless you are a developer or are doing network performance analysis
          or tuning, say N.
@@ -102,10 +102,10 @@ config CIFS_WEAK_PW_HASH
          is enabled in the kernel build, LANMAN authentication will not be
          used automatically. At runtime LANMAN mounts are disabled but
          can be set to required (or optional) either in
-         /proc/fs/cifs (see fs/cifs/README for more detail) or via an
-         option on the mount command. This support is disabled by
-         default in order to reduce the possibility of a downgrade
-         attack.
+         /proc/fs/cifs (see Documentation/admin-guide/cifs/usage.rst for
+         more detail) or via an option on the mount command. This support
+         is disabled by default in order to reduce the possibility of a
+         downgrade attack.
 
          If unsure, say N.
 
@@ -190,6 +190,17 @@ config CIFS_DFS_UPCALL
          servers if their addresses change or for implicit mounts of
          DFS junction points. If unsure, say Y.
 
+config CIFS_SWN_UPCALL
+       bool "SWN feature support"
+       depends on CIFS
+       help
+         The Service Witness Protocol (SWN) is used to get notifications
+         from a highly available server of resource state changes. This
+         feature enables an upcall mechanism for CIFS which contacts a
+         userspace daemon to establish the DCE/RPC connection to retrieve
+         the cluster available interfaces and resource change notifications.
+         If unsure, say Y.
+
 config CIFS_NFSD_EXPORT
        bool "Allow nfsd to export CIFS file system"
        depends on CIFS && BROKEN
index cd17d0e..5213b20 100644 (file)
@@ -8,7 +8,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
          inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
          cifs_unicode.o nterr.o cifsencrypt.o \
-         readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
+         readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \
          smb2ops.o smb2maperror.o smb2transport.o \
          smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o
 
@@ -18,6 +18,8 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
 cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
 
+cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
+
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
 
 cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
index 0f2adec..488fe0f 100644 (file)
@@ -53,30 +53,6 @@ const struct fscache_cookie_def cifs_fscache_server_index_def = {
        .type = FSCACHE_COOKIE_TYPE_INDEX,
 };
 
-char *extract_sharename(const char *treename)
-{
-       const char *src;
-       char *delim, *dst;
-       int len;
-
-       /* skip double chars at the beginning */
-       src = treename + 2;
-
-       /* share name is always preceded by '\\' now */
-       delim = strchr(src, '\\');
-       if (!delim)
-               return ERR_PTR(-EINVAL);
-       delim++;
-       len = strlen(delim);
-
-       /* caller has to free the memory */
-       dst = kstrndup(delim, len, GFP_KERNEL);
-       if (!dst)
-               return ERR_PTR(-ENOMEM);
-
-       return dst;
-}
-
 static enum
 fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
                                              const void *data,
index 53588d7..b231dcf 100644 (file)
@@ -23,6 +23,9 @@
 #ifdef CONFIG_CIFS_SMB_DIRECT
 #include "smbdirect.h"
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "cifs_swn.h"
+#endif
 
 void
 cifs_dump_mem(char *label, void *data, int length)
@@ -115,6 +118,10 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
                seq_printf(m, " POSIX Extensions");
        if (tcon->ses->server->ops->dump_share_caps)
                tcon->ses->server->ops->dump_share_caps(m, tcon);
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       if (tcon->use_witness)
+               seq_puts(m, " Witness");
+#endif
 
        if (tcon->need_reconnect)
                seq_puts(m, "\tDISCONNECTED ");
@@ -262,6 +269,9 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        seq_printf(m, ",XATTR");
 #endif
        seq_printf(m, ",ACL");
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       seq_puts(m, ",WITNESS");
+#endif
        seq_putc(m, '\n');
        seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
        seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
@@ -462,6 +472,9 @@ skip_rdma:
        spin_unlock(&cifs_tcp_ses_lock);
        seq_putc(m, '\n');
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       cifs_swn_dump(m);
+#endif
        /* BB add code to dump additional info such as TCP session info now */
        return 0;
 }
index cc3ada1..e4c6ae4 100644 (file)
@@ -23,6 +23,7 @@
 #include "cifs_debug.h"
 #include "cifs_unicode.h"
 #include "dfs_cache.h"
+#include "fs_context.h"
 
 static LIST_HEAD(cifs_dfs_automount_list);
 
@@ -124,7 +125,6 @@ cifs_build_devname(char *nodename, const char *prepath)
  * @sb_mountdata:      parent/root DFS mount options (template)
  * @fullpath:          full path in UNC format
  * @ref:               optional server's referral
- * @devname:           optional pointer for saving device name
  *
  * creates mount options for submount based on template options sb_mountdata
  * and replacing unc,ip,prefixpath options with ones we've got form ref_unc.
@@ -134,8 +134,7 @@ cifs_build_devname(char *nodename, const char *prepath)
  */
 char *cifs_compose_mount_options(const char *sb_mountdata,
                                   const char *fullpath,
-                                  const struct dfs_info3_param *ref,
-                                  char **devname)
+                                  const struct dfs_info3_param *ref)
 {
        int rc;
        char *name;
@@ -232,10 +231,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
        strcat(mountdata, "ip=");
        strcat(mountdata, srvIP);
 
-       if (devname)
-               *devname = name;
-       else
-               kfree(name);
+       kfree(name);
 
        /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
        /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
@@ -258,6 +254,7 @@ compose_mount_options_err:
  * to perform failover in case we failed to connect to the first target in the
  * referral.
  *
+ * @mntpt:             directory entry for the path we are trying to automount
  * @cifs_sb:           parent/root superblock
  * @fullpath:          full path in UNC format
  */
@@ -275,9 +272,13 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt,
 
        convert_delimiter(devname, '/');
 
+       /* TODO: change to call fs_context_for_mount(), fill in context directly, call fc_mount */
+
+       /* See afs_mntpt_do_automount in fs/afs/mntpt.c for an example */
+
        /* strip first '\' from fullpath */
-       mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
-                                              fullpath + 1, NULL, NULL);
+       mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+                                              fullpath + 1, NULL);
        if (IS_ERR(mountdata)) {
                kfree(devname);
                return (struct vfsmount *)mountdata;
index 6e7c442..aa77edc 100644 (file)
@@ -61,19 +61,9 @@ struct cifs_sb_info {
        spinlock_t tlink_tree_lock;
        struct tcon_link *master_tlink;
        struct nls_table *local_nls;
-       unsigned int bsize;
-       unsigned int rsize;
-       unsigned int wsize;
-       unsigned long actimeo; /* attribute cache timeout (jiffies) */
+       struct smb3_fs_context *ctx;
        atomic_t active;
-       kuid_t  mnt_uid;
-       kgid_t  mnt_gid;
-       kuid_t  mnt_backupuid;
-       kgid_t  mnt_backupgid;
-       umode_t mnt_file_mode;
-       umode_t mnt_dir_mode;
        unsigned int mnt_cifs_flags;
-       char   *mountdata; /* options received at mount time or via DFS refs */
        struct delayed_work prune_tlinks;
        struct rcu_head rcu;
 
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
new file mode 100644 (file)
index 0000000..d35f599
--- /dev/null
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Witness Service client for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#include <linux/kref.h>
+#include <net/genetlink.h>
+#include <uapi/linux/cifs/cifs_netlink.h>
+
+#include "cifs_swn.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "fscache.h"
+#include "cifs_debug.h"
+#include "netlink.h"
+
+static DEFINE_IDR(cifs_swnreg_idr);
+static DEFINE_MUTEX(cifs_swnreg_idr_mutex);
+
+struct cifs_swn_reg {
+       int id;
+       struct kref ref_count;
+
+       const char *net_name;
+       const char *share_name;
+       bool net_name_notify;
+       bool share_name_notify;
+       bool ip_notify;
+
+       struct cifs_tcon *tcon;
+};
+
+static int cifs_swn_auth_info_krb(struct cifs_tcon *tcon, struct sk_buff *skb)
+{
+       int ret;
+
+       ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_KRB_AUTH);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static int cifs_swn_auth_info_ntlm(struct cifs_tcon *tcon, struct sk_buff *skb)
+{
+       int ret;
+
+       if (tcon->ses->user_name != NULL) {
+               ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_USER_NAME, tcon->ses->user_name);
+               if (ret < 0)
+                       return ret;
+       }
+
+       if (tcon->ses->password != NULL) {
+               ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_PASSWORD, tcon->ses->password);
+               if (ret < 0)
+                       return ret;
+       }
+
+       if (tcon->ses->domainName != NULL) {
+               ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_DOMAIN_NAME, tcon->ses->domainName);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * Sends a register message to the userspace daemon based on the registration.
+ * The authentication information to connect to the witness service is bundled
+ * into the message.
+ */
+static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg)
+{
+       struct sk_buff *skb;
+       struct genlmsghdr *hdr;
+       enum securityEnum authtype;
+       struct sockaddr_storage *addr;
+       int ret;
+
+       skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb == NULL) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_REGISTER);
+       if (hdr == NULL) {
+               ret = -ENOMEM;
+               goto nlmsg_fail;
+       }
+
+       ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       /*
+        * If there is an address stored use it instead of the server address, because we are
+        * in the process of reconnecting to it after a share has been moved or we have been
+        * told to switch to it (client move message). In these cases we unregister from the
+        * server address and register to the new address when we receive the notification.
+        */
+       if (swnreg->tcon->ses->server->use_swn_dstaddr)
+               addr = &swnreg->tcon->ses->server->swn_dstaddr;
+       else
+               addr = &swnreg->tcon->ses->server->dstaddr;
+
+       ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage), addr);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       if (swnreg->net_name_notify) {
+               ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY);
+               if (ret < 0)
+                       goto nlmsg_fail;
+       }
+
+       if (swnreg->share_name_notify) {
+               ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY);
+               if (ret < 0)
+                       goto nlmsg_fail;
+       }
+
+       if (swnreg->ip_notify) {
+               ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY);
+               if (ret < 0)
+                       goto nlmsg_fail;
+       }
+
+       authtype = cifs_select_sectype(swnreg->tcon->ses->server, swnreg->tcon->ses->sectype);
+       switch (authtype) {
+       case Kerberos:
+               ret = cifs_swn_auth_info_krb(swnreg->tcon, skb);
+               if (ret < 0) {
+                       cifs_dbg(VFS, "%s: Failed to get kerberos auth info: %d\n", __func__, ret);
+                       goto nlmsg_fail;
+               }
+               break;
+       case LANMAN:
+       case NTLM:
+       case NTLMv2:
+       case RawNTLMSSP:
+               ret = cifs_swn_auth_info_ntlm(swnreg->tcon, skb);
+               if (ret < 0) {
+                       cifs_dbg(VFS, "%s: Failed to get NTLM auth info: %d\n", __func__, ret);
+                       goto nlmsg_fail;
+               }
+               break;
+       default:
+               cifs_dbg(VFS, "%s: secType %d not supported!\n", __func__, authtype);
+               ret = -EINVAL;
+               goto nlmsg_fail;
+       }
+
+       genlmsg_end(skb, hdr);
+       genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC);
+
+       cifs_dbg(FYI, "%s: Message to register for network name %s with id %d sent\n", __func__,
+                       swnreg->net_name, swnreg->id);
+
+       return 0;
+
+nlmsg_fail:
+       genlmsg_cancel(skb, hdr);
+       nlmsg_free(skb);
+fail:
+       return ret;
+}
+
+/*
+ * Sends an uregister message to the userspace daemon based on the registration
+ */
+static int cifs_swn_send_unregister_message(struct cifs_swn_reg *swnreg)
+{
+       struct sk_buff *skb;
+       struct genlmsghdr *hdr;
+       int ret;
+
+       skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (skb == NULL)
+               return -ENOMEM;
+
+       hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_UNREGISTER);
+       if (hdr == NULL) {
+               ret = -ENOMEM;
+               goto nlmsg_fail;
+       }
+
+       ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage),
+                       &swnreg->tcon->ses->server->dstaddr);
+       if (ret < 0)
+               goto nlmsg_fail;
+
+       if (swnreg->net_name_notify) {
+               ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY);
+               if (ret < 0)
+                       goto nlmsg_fail;
+       }
+
+       if (swnreg->share_name_notify) {
+               ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY);
+               if (ret < 0)
+                       goto nlmsg_fail;
+       }
+
+       if (swnreg->ip_notify) {
+               ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY);
+               if (ret < 0)
+                       goto nlmsg_fail;
+       }
+
+       genlmsg_end(skb, hdr);
+       genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC);
+
+       cifs_dbg(FYI, "%s: Message to unregister for network name %s with id %d sent\n", __func__,
+                       swnreg->net_name, swnreg->id);
+
+       return 0;
+
+nlmsg_fail:
+       genlmsg_cancel(skb, hdr);
+       nlmsg_free(skb);
+       return ret;
+}
+
+/*
+ * Try to find a matching registration for the tcon's server name and share name.
+ * Calls to this funciton must be protected by cifs_swnreg_idr_mutex.
+ * TODO Try to avoid memory allocations
+ */
+static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
+{
+       struct cifs_swn_reg *swnreg;
+       int id;
+       const char *share_name;
+       const char *net_name;
+
+       net_name = extract_hostname(tcon->treeName);
+       if (IS_ERR(net_name)) {
+               int ret;
+
+               ret = PTR_ERR(net_name);
+               cifs_dbg(VFS, "%s: failed to extract host name from target '%s': %d\n",
+                               __func__, tcon->treeName, ret);
+               return ERR_PTR(-EINVAL);
+       }
+
+       share_name = extract_sharename(tcon->treeName);
+       if (IS_ERR(share_name)) {
+               int ret;
+
+               ret = PTR_ERR(net_name);
+               cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
+                               __func__, tcon->treeName, ret);
+               kfree(net_name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+               if (strcasecmp(swnreg->net_name, net_name) != 0
+                   || strcasecmp(swnreg->share_name, share_name) != 0) {
+                       continue;
+               }
+
+               cifs_dbg(FYI, "Existing swn registration for %s:%s found\n", swnreg->net_name,
+                               swnreg->share_name);
+
+               kfree(net_name);
+               kfree(share_name);
+
+               return swnreg;
+       }
+
+       kfree(net_name);
+       kfree(share_name);
+
+       return ERR_PTR(-EEXIST);
+}
+
+/*
+ * Get a registration for the tcon's server and share name, allocating a new one if it does not
+ * exists
+ */
+static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon)
+{
+       struct cifs_swn_reg *reg = NULL;
+       int ret;
+
+       mutex_lock(&cifs_swnreg_idr_mutex);
+
+       /* Check if we are already registered for this network and share names */
+       reg = cifs_find_swn_reg(tcon);
+       if (!IS_ERR(reg)) {
+               kref_get(&reg->ref_count);
+               mutex_unlock(&cifs_swnreg_idr_mutex);
+               return reg;
+       } else if (PTR_ERR(reg) != -EEXIST) {
+               mutex_unlock(&cifs_swnreg_idr_mutex);
+               return reg;
+       }
+
+       reg = kmalloc(sizeof(struct cifs_swn_reg), GFP_ATOMIC);
+       if (reg == NULL) {
+               mutex_unlock(&cifs_swnreg_idr_mutex);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       kref_init(&reg->ref_count);
+
+       reg->id = idr_alloc(&cifs_swnreg_idr, reg, 1, 0, GFP_ATOMIC);
+       if (reg->id < 0) {
+               cifs_dbg(FYI, "%s: failed to allocate registration id\n", __func__);
+               ret = reg->id;
+               goto fail;
+       }
+
+       reg->net_name = extract_hostname(tcon->treeName);
+       if (IS_ERR(reg->net_name)) {
+               ret = PTR_ERR(reg->net_name);
+               cifs_dbg(VFS, "%s: failed to extract host name from target: %d\n", __func__, ret);
+               goto fail_idr;
+       }
+
+       reg->share_name = extract_sharename(tcon->treeName);
+       if (IS_ERR(reg->share_name)) {
+               ret = PTR_ERR(reg->share_name);
+               cifs_dbg(VFS, "%s: failed to extract share name from target: %d\n", __func__, ret);
+               goto fail_net_name;
+       }
+
+       reg->net_name_notify = true;
+       reg->share_name_notify = true;
+       reg->ip_notify = (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT);
+
+       reg->tcon = tcon;
+
+       mutex_unlock(&cifs_swnreg_idr_mutex);
+
+       return reg;
+
+fail_net_name:
+       kfree(reg->net_name);
+fail_idr:
+       idr_remove(&cifs_swnreg_idr, reg->id);
+fail:
+       kfree(reg);
+       mutex_unlock(&cifs_swnreg_idr_mutex);
+       return ERR_PTR(ret);
+}
+
+static void cifs_swn_reg_release(struct kref *ref)
+{
+       struct cifs_swn_reg *swnreg = container_of(ref, struct cifs_swn_reg, ref_count);
+       int ret;
+
+       ret = cifs_swn_send_unregister_message(swnreg);
+       if (ret < 0)
+               cifs_dbg(VFS, "%s: Failed to send unregister message: %d\n", __func__, ret);
+
+       idr_remove(&cifs_swnreg_idr, swnreg->id);
+       kfree(swnreg->net_name);
+       kfree(swnreg->share_name);
+       kfree(swnreg);
+}
+
+static void cifs_put_swn_reg(struct cifs_swn_reg *swnreg)
+{
+       mutex_lock(&cifs_swnreg_idr_mutex);
+       kref_put(&swnreg->ref_count, cifs_swn_reg_release);
+       mutex_unlock(&cifs_swnreg_idr_mutex);
+}
+
+static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const char *name, int state)
+{
+       int i;
+
+       switch (state) {
+       case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
+               cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
+               for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
+                       spin_lock(&GlobalMid_Lock);
+                       if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+                               swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+                       spin_unlock(&GlobalMid_Lock);
+               }
+               break;
+       case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
+               cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
+               for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
+                       spin_lock(&GlobalMid_Lock);
+                       if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+                               swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+                       spin_unlock(&GlobalMid_Lock);
+               }
+               break;
+       case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
+               cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
+               break;
+       }
+       return 0;
+}
+
+static bool cifs_sockaddr_equal(struct sockaddr_storage *addr1, struct sockaddr_storage *addr2)
+{
+       if (addr1->ss_family != addr2->ss_family)
+               return false;
+
+       if (addr1->ss_family == AF_INET) {
+               return (memcmp(&((const struct sockaddr_in *)addr1)->sin_addr,
+                               &((const struct sockaddr_in *)addr2)->sin_addr,
+                               sizeof(struct in_addr)) == 0);
+       }
+
+       if (addr1->ss_family == AF_INET6) {
+               return (memcmp(&((const struct sockaddr_in6 *)addr1)->sin6_addr,
+                               &((const struct sockaddr_in6 *)addr2)->sin6_addr,
+                               sizeof(struct in6_addr)) == 0);
+       }
+
+       return false;
+}
+
+static int cifs_swn_store_swn_addr(const struct sockaddr_storage *new,
+                                  const struct sockaddr_storage *old,
+                                  struct sockaddr_storage *dst)
+{
+       __be16 port;
+
+       if (old->ss_family == AF_INET) {
+               struct sockaddr_in *ipv4 = (struct sockaddr_in *)old;
+
+               port = ipv4->sin_port;
+       }
+
+       if (old->ss_family == AF_INET6) {
+               struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)old;
+
+               port = ipv6->sin6_port;
+       }
+
+       if (new->ss_family == AF_INET) {
+               struct sockaddr_in *ipv4 = (struct sockaddr_in *)new;
+
+               ipv4->sin_port = port;
+       }
+
+       if (new->ss_family == AF_INET6) {
+               struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)new;
+
+               ipv6->sin6_port = port;
+       }
+
+       *dst = *new;
+
+       return 0;
+}
+
+static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *addr)
+{
+       int ret = 0;
+
+       /* Store the reconnect address */
+       mutex_lock(&tcon->ses->server->srv_mutex);
+       if (cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr))
+               goto unlock;
+
+       ret = cifs_swn_store_swn_addr(addr, &tcon->ses->server->dstaddr,
+                                     &tcon->ses->server->swn_dstaddr);
+       if (ret < 0) {
+               cifs_dbg(VFS, "%s: failed to store address: %d\n", __func__, ret);
+               goto unlock;
+       }
+       tcon->ses->server->use_swn_dstaddr = true;
+
+       /*
+        * Unregister to stop receiving notifications for the old IP address.
+        */
+       ret = cifs_swn_unregister(tcon);
+       if (ret < 0) {
+               cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n",
+                        __func__, ret);
+               goto unlock;
+       }
+
+       /*
+        * And register to receive notifications for the new IP address now that we have
+        * stored the new address.
+        */
+       ret = cifs_swn_register(tcon);
+       if (ret < 0) {
+               cifs_dbg(VFS, "%s: Failed to register for witness notifications: %d\n",
+                        __func__, ret);
+               goto unlock;
+       }
+
+       spin_lock(&GlobalMid_Lock);
+       if (tcon->ses->server->tcpStatus != CifsExiting)
+               tcon->ses->server->tcpStatus = CifsNeedReconnect;
+       spin_unlock(&GlobalMid_Lock);
+
+unlock:
+       mutex_unlock(&tcon->ses->server->srv_mutex);
+
+       return ret;
+}
+
+static int cifs_swn_client_move(struct cifs_swn_reg *swnreg, struct sockaddr_storage *addr)
+{
+       struct sockaddr_in *ipv4 = (struct sockaddr_in *)addr;
+       struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)addr;
+
+       if (addr->ss_family == AF_INET)
+               cifs_dbg(FYI, "%s: move to %pI4\n", __func__, &ipv4->sin_addr);
+       else if (addr->ss_family == AF_INET6)
+               cifs_dbg(FYI, "%s: move to %pI6\n", __func__, &ipv6->sin6_addr);
+
+       return cifs_swn_reconnect(swnreg->tcon, addr);
+}
+
+int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info)
+{
+       struct cifs_swn_reg *swnreg;
+       char name[256];
+       int type;
+
+       if (info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]) {
+               int swnreg_id;
+
+               swnreg_id = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]);
+               mutex_lock(&cifs_swnreg_idr_mutex);
+               swnreg = idr_find(&cifs_swnreg_idr, swnreg_id);
+               mutex_unlock(&cifs_swnreg_idr_mutex);
+               if (swnreg == NULL) {
+                       cifs_dbg(FYI, "%s: registration id %d not found\n", __func__, swnreg_id);
+                       return -EINVAL;
+               }
+       } else {
+               cifs_dbg(FYI, "%s: missing registration id attribute\n", __func__);
+               return -EINVAL;
+       }
+
+       if (info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]) {
+               type = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]);
+       } else {
+               cifs_dbg(FYI, "%s: missing notification type attribute\n", __func__);
+               return -EINVAL;
+       }
+
+       switch (type) {
+       case CIFS_SWN_NOTIFICATION_RESOURCE_CHANGE: {
+               int state;
+
+               if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]) {
+                       nla_strscpy(name, info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME],
+                                       sizeof(name));
+               } else {
+                       cifs_dbg(FYI, "%s: missing resource name attribute\n", __func__);
+                       return -EINVAL;
+               }
+               if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]) {
+                       state = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]);
+               } else {
+                       cifs_dbg(FYI, "%s: missing resource state attribute\n", __func__);
+                       return -EINVAL;
+               }
+               return cifs_swn_resource_state_changed(swnreg, name, state);
+       }
+       case CIFS_SWN_NOTIFICATION_CLIENT_MOVE: {
+               struct sockaddr_storage addr;
+
+               if (info->attrs[CIFS_GENL_ATTR_SWN_IP]) {
+                       nla_memcpy(&addr, info->attrs[CIFS_GENL_ATTR_SWN_IP], sizeof(addr));
+               } else {
+                       cifs_dbg(FYI, "%s: missing IP address attribute\n", __func__);
+                       return -EINVAL;
+               }
+               return cifs_swn_client_move(swnreg, &addr);
+       }
+       default:
+               cifs_dbg(FYI, "%s: unknown notification type %d\n", __func__, type);
+               break;
+       }
+
+       return 0;
+}
+
+int cifs_swn_register(struct cifs_tcon *tcon)
+{
+       struct cifs_swn_reg *swnreg;
+       int ret;
+
+       swnreg = cifs_get_swn_reg(tcon);
+       if (IS_ERR(swnreg))
+               return PTR_ERR(swnreg);
+
+       ret = cifs_swn_send_register_message(swnreg);
+       if (ret < 0) {
+               cifs_dbg(VFS, "%s: Failed to send swn register message: %d\n", __func__, ret);
+               /* Do not put the swnreg or return error, the echo task will retry */
+       }
+
+       return 0;
+}
+
+int cifs_swn_unregister(struct cifs_tcon *tcon)
+{
+       struct cifs_swn_reg *swnreg;
+
+       mutex_lock(&cifs_swnreg_idr_mutex);
+
+       swnreg = cifs_find_swn_reg(tcon);
+       if (IS_ERR(swnreg)) {
+               mutex_unlock(&cifs_swnreg_idr_mutex);
+               return PTR_ERR(swnreg);
+       }
+
+       mutex_unlock(&cifs_swnreg_idr_mutex);
+
+       cifs_put_swn_reg(swnreg);
+
+       return 0;
+}
+
+void cifs_swn_dump(struct seq_file *m)
+{
+       struct cifs_swn_reg *swnreg;
+       struct sockaddr_in *sa;
+       struct sockaddr_in6 *sa6;
+       int id;
+
+       seq_puts(m, "Witness registrations:");
+
+       mutex_lock(&cifs_swnreg_idr_mutex);
+       idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+               seq_printf(m, "\nId: %u Refs: %u Network name: '%s'%s Share name: '%s'%s Ip address: ",
+                               id, kref_read(&swnreg->ref_count),
+                               swnreg->net_name, swnreg->net_name_notify ? "(y)" : "(n)",
+                               swnreg->share_name, swnreg->share_name_notify ? "(y)" : "(n)");
+               switch (swnreg->tcon->ses->server->dstaddr.ss_family) {
+               case AF_INET:
+                       sa = (struct sockaddr_in *) &swnreg->tcon->ses->server->dstaddr;
+                       seq_printf(m, "%pI4", &sa->sin_addr.s_addr);
+                       break;
+               case AF_INET6:
+                       sa6 = (struct sockaddr_in6 *) &swnreg->tcon->ses->server->dstaddr;
+                       seq_printf(m, "%pI6", &sa6->sin6_addr.s6_addr);
+                       if (sa6->sin6_scope_id)
+                               seq_printf(m, "%%%u", sa6->sin6_scope_id);
+                       break;
+               default:
+                       seq_puts(m, "(unknown)");
+               }
+               seq_printf(m, "%s", swnreg->ip_notify ? "(y)" : "(n)");
+       }
+       mutex_unlock(&cifs_swnreg_idr_mutex);
+       seq_puts(m, "\n");
+}
+
+void cifs_swn_check(void)
+{
+       struct cifs_swn_reg *swnreg;
+       int id;
+       int ret;
+
+       mutex_lock(&cifs_swnreg_idr_mutex);
+       idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+               ret = cifs_swn_send_register_message(swnreg);
+               if (ret < 0)
+                       cifs_dbg(FYI, "%s: Failed to send register message: %d\n", __func__, ret);
+       }
+       mutex_unlock(&cifs_swnreg_idr_mutex);
+}
diff --git a/fs/cifs/cifs_swn.h b/fs/cifs/cifs_swn.h
new file mode 100644 (file)
index 0000000..236ecd4
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Witness Service client for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#ifndef _CIFS_SWN_H
+#define _CIFS_SWN_H
+
+struct cifs_tcon;
+struct sk_buff;
+struct genl_info;
+
+extern int cifs_swn_register(struct cifs_tcon *tcon);
+
+extern int cifs_swn_unregister(struct cifs_tcon *tcon);
+
+extern int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info);
+
+extern void cifs_swn_dump(struct seq_file *m);
+
+extern void cifs_swn_check(void);
+
+#endif /* _CIFS_SWN_H */
index ef4784e..562913e 100644 (file)
@@ -32,6 +32,7 @@
 #include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
+#include "fs_context.h"
 
 /* security id for everyone/world system group */
 static const struct cifs_sid sid_everyone = {
@@ -346,8 +347,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
        struct key *sidkey;
        char *sidstr;
        const struct cred *saved_cred;
-       kuid_t fuid = cifs_sb->mnt_uid;
-       kgid_t fgid = cifs_sb->mnt_gid;
+       kuid_t fuid = cifs_sb->ctx->linux_uid;
+       kgid_t fgid = cifs_sb->ctx->linux_gid;
 
        /*
         * If we have too many subauthorities, then something is really wrong.
@@ -448,7 +449,7 @@ out_revert_creds:
 
        /*
         * Note that we return 0 here unconditionally. If the mapping
-        * fails then we just fall back to using the mnt_uid/mnt_gid.
+        * fails then we just fall back to using the ctx->linux_uid/linux_gid.
         */
 got_valid_id:
        rc = 0;
@@ -557,30 +558,37 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
    bits to set can be: S_IRWXU, S_IRWXG or S_IRWXO ie 00700 or 00070 or 00007
 */
 static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
-                                umode_t *pbits_to_set)
+                                umode_t *pdenied, umode_t mask)
 {
        __u32 flags = le32_to_cpu(ace_flags);
-       /* the order of ACEs is important.  The canonical order is to begin with
-          DENY entries followed by ALLOW, otherwise an allow entry could be
-          encountered first, making the subsequent deny entry like "dead code"
-          which would be superflous since Windows stops when a match is made
-          for the operation you are trying to perform for your user */
-
-       /* For deny ACEs we change the mask so that subsequent allow access
-          control entries do not turn on the bits we are denying */
+       /*
+        * Do not assume "preferred" or "canonical" order.
+        * The first DENY or ALLOW ACE which matches perfectly is
+        * the permission to be used. Once allowed or denied, same
+        * permission in later ACEs do not matter.
+        */
+
+       /* If not already allowed, deny these bits */
        if (type == ACCESS_DENIED) {
-               if (flags & GENERIC_ALL)
-                       *pbits_to_set &= ~S_IRWXUGO;
-
-               if ((flags & GENERIC_WRITE) ||
-                       ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS))
-                       *pbits_to_set &= ~S_IWUGO;
-               if ((flags & GENERIC_READ) ||
-                       ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS))
-                       *pbits_to_set &= ~S_IRUGO;
-               if ((flags & GENERIC_EXECUTE) ||
-                       ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
-                       *pbits_to_set &= ~S_IXUGO;
+               if (flags & GENERIC_ALL &&
+                               !(*pmode & mask & 0777))
+                       *pdenied |= mask & 0777;
+
+               if (((flags & GENERIC_WRITE) ||
+                               ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) &&
+                               !(*pmode & mask & 0222))
+                       *pdenied |= mask & 0222;
+
+               if (((flags & GENERIC_READ) ||
+                               ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) &&
+                               !(*pmode & mask & 0444))
+                       *pdenied |= mask & 0444;
+
+               if (((flags & GENERIC_EXECUTE) ||
+                               ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) &&
+                               !(*pmode & mask & 0111))
+                       *pdenied |= mask & 0111;
+
                return;
        } else if (type != ACCESS_ALLOWED) {
                cifs_dbg(VFS, "unknown access control type %d\n", type);
@@ -588,20 +596,38 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
        }
        /* else ACCESS_ALLOWED type */
 
-       if (flags & GENERIC_ALL) {
-               *pmode |= (S_IRWXUGO & (*pbits_to_set));
+       if ((flags & GENERIC_ALL) &&
+                       !(*pdenied & mask & 0777)) {
+               *pmode |= mask & 0777;
                cifs_dbg(NOISY, "all perms\n");
                return;
        }
-       if ((flags & GENERIC_WRITE) ||
-                       ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS))
-               *pmode |= (S_IWUGO & (*pbits_to_set));
-       if ((flags & GENERIC_READ) ||
-                       ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS))
-               *pmode |= (S_IRUGO & (*pbits_to_set));
-       if ((flags & GENERIC_EXECUTE) ||
-                       ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
-               *pmode |= (S_IXUGO & (*pbits_to_set));
+
+       if (((flags & GENERIC_WRITE) ||
+                       ((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) &&
+                       !(*pdenied & mask & 0222))
+               *pmode |= mask & 0222;
+
+       if (((flags & GENERIC_READ) ||
+                       ((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) &&
+                       !(*pdenied & mask & 0444))
+               *pmode |= mask & 0444;
+
+       if (((flags & GENERIC_EXECUTE) ||
+                       ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) &&
+                       !(*pdenied & mask & 0111))
+               *pmode |= mask & 0111;
+
+       /* If DELETE_CHILD is set only on an owner ACE, set sticky bit */
+       if (flags & FILE_DELETE_CHILD) {
+               if (mask == ACL_OWNER_MASK) {
+                       if (!(*pdenied & 01000))
+                               *pmode |= 01000;
+               } else if (!(*pdenied & 01000)) {
+                       *pmode &= ~01000;
+                       *pdenied |= 01000;
+               }
+       }
 
        cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode);
        return;
@@ -638,17 +664,26 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
 }
 
 static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
-                       const struct cifs_sid *psid, __u64 nmode, umode_t bits)
+                       const struct cifs_sid *psid, __u64 nmode,
+                       umode_t bits, __u8 access_type,
+                       bool allow_delete_child)
 {
        int i;
        __u16 size = 0;
        __u32 access_req = 0;
 
-       pntace->type = ACCESS_ALLOWED;
+       pntace->type = access_type;
        pntace->flags = 0x0;
        mode_to_access_flags(nmode, bits, &access_req);
-       if (!access_req)
+
+       if (access_type == ACCESS_ALLOWED && allow_delete_child)
+               access_req |= FILE_DELETE_CHILD;
+
+       if (access_type == ACCESS_ALLOWED && !access_req)
                access_req = SET_MINIMUM_RIGHTS;
+       else if (access_type == ACCESS_DENIED)
+               access_req &= ~SET_MINIMUM_RIGHTS;
+
        pntace->access_req = cpu_to_le32(access_req);
 
        pntace->sid.revision = psid->revision;
@@ -716,7 +751,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
        if (!pdacl) {
                /* no DACL in the security descriptor, set
                   all the permissions for user/group/other */
-               fattr->cf_mode |= S_IRWXUGO;
+               fattr->cf_mode |= 0777;
                return;
        }
 
@@ -733,16 +768,14 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
        /* reset rwx permissions for user/group/other.
           Also, if num_aces is 0 i.e. DACL has no ACEs,
           user/group/other have no permissions */
-       fattr->cf_mode &= ~(S_IRWXUGO);
+       fattr->cf_mode &= ~(0777);
 
        acl_base = (char *)pdacl;
        acl_size = sizeof(struct cifs_acl);
 
        num_aces = le32_to_cpu(pdacl->num_aces);
        if (num_aces > 0) {
-               umode_t user_mask = S_IRWXU;
-               umode_t group_mask = S_IRWXG;
-               umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO;
+               umode_t denied_mode = 0;
 
                if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *))
                        return;
@@ -768,26 +801,28 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
                                fattr->cf_mode |=
                                        le32_to_cpu(ppace[i]->sid.sub_auth[2]);
                                break;
-                       } else if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
-                               access_flags_to_mode(ppace[i]->access_req,
-                                                    ppace[i]->type,
-                                                    &fattr->cf_mode,
-                                                    &user_mask);
-                       else if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
-                               access_flags_to_mode(ppace[i]->access_req,
-                                                    ppace[i]->type,
-                                                    &fattr->cf_mode,
-                                                    &group_mask);
-                       else if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
-                               access_flags_to_mode(ppace[i]->access_req,
-                                                    ppace[i]->type,
-                                                    &fattr->cf_mode,
-                                                    &other_mask);
-                       else if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
-                               access_flags_to_mode(ppace[i]->access_req,
-                                                    ppace[i]->type,
-                                                    &fattr->cf_mode,
-                                                    &other_mask);
+                       } else {
+                               if (compare_sids(&(ppace[i]->sid), pownersid) == 0) {
+                                       access_flags_to_mode(ppace[i]->access_req,
+                                                       ppace[i]->type,
+                                                       &fattr->cf_mode,
+                                                       &denied_mode,
+                                                       ACL_OWNER_MASK);
+                               } else if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0) {
+                                       access_flags_to_mode(ppace[i]->access_req,
+                                                       ppace[i]->type,
+                                                       &fattr->cf_mode,
+                                                       &denied_mode,
+                                                       ACL_GROUP_MASK);
+                               } else if ((compare_sids(&(ppace[i]->sid), &sid_everyone) == 0) ||
+                                               (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)) {
+                                       access_flags_to_mode(ppace[i]->access_req,
+                                                       ppace[i]->type,
+                                                       &fattr->cf_mode,
+                                                       &denied_mode,
+                                                       ACL_EVERYONE_MASK);
+                               }
+                       }
 
 
 /*                     memcpy((void *)(&(cifscred->aces[i])),
@@ -873,32 +908,91 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace)
 }
 
 static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
-                       struct cifs_sid *pgrpsid, __u64 nmode, bool modefromsid)
+                       struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid)
 {
        u16 size = 0;
        u32 num_aces = 0;
        struct cifs_acl *pnndacl;
+       __u64 nmode;
+       __u64 user_mode;
+       __u64 group_mode;
+       __u64 other_mode;
+       __u64 deny_user_mode = 0;
+       __u64 deny_group_mode = 0;
+       bool sticky_set = false;
 
        pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
 
+       nmode = *pnmode;
+
        if (modefromsid) {
                struct cifs_ace *pntace =
                        (struct cifs_ace *)((char *)pnndacl + size);
 
                size += setup_special_mode_ACE(pntace, nmode);
                num_aces++;
+               goto set_size;
        }
 
+       /*
+        * We'll try to keep the mode as requested by the user.
+        * But in cases where we cannot meaningfully convert that
+        * into ACL, return back the updated mode, so that it is
+        * updated in the inode.
+        */
+
+       if (!memcmp(pownersid, pgrpsid, sizeof(struct cifs_sid))) {
+               /*
+                * Case when owner and group SIDs are the same.
+                * Set the more restrictive of the two modes.
+                */
+               user_mode = nmode & (nmode << 3) & 0700;
+               group_mode = nmode & (nmode >> 3) & 0070;
+       } else {
+               user_mode = nmode & 0700;
+               group_mode = nmode & 0070;
+       }
+
+       other_mode = nmode & 0007;
+
+       /* We need DENY ACE when the perm is more restrictive than the next sets. */
+       deny_user_mode = ~(user_mode) & ((group_mode << 3) | (other_mode << 6)) & 0700;
+       deny_group_mode = ~(group_mode) & (other_mode << 3) & 0070;
+
+       *pnmode = user_mode | group_mode | other_mode | (nmode & ~0777);
+
+       /* This tells if we should allow delete child for group and everyone. */
+       if (nmode & 01000)
+               sticky_set = true;
+
+       if (deny_user_mode) {
+               size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
+                               pownersid, deny_user_mode, 0700, ACCESS_DENIED, false);
+               num_aces++;
+       }
+       /* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/
+       if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) {
+               size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
+                               pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+               num_aces++;
+       }
        size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size),
-                                       pownersid, nmode, S_IRWXU);
+                       pownersid, user_mode, 0700, ACCESS_ALLOWED, true);
        num_aces++;
+       /* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */
+       if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) {
+               size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
+                               pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+               num_aces++;
+       }
        size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                                       pgrpsid, nmode, S_IRWXG);
+                       pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set);
        num_aces++;
        size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                                        &sid_everyone, nmode, S_IRWXO);
+                       &sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set);
        num_aces++;
 
+set_size:
        pndacl->num_aces = cpu_to_le32(num_aces);
        pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
 
@@ -1000,7 +1094,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
 
 /* Convert permission bits from mode to equivalent CIFS ACL */
 static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
-       __u32 secdesclen, __u64 nmode, kuid_t uid, kgid_t gid,
+       __u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
        bool mode_from_sid, bool id_from_sid, int *aclflag)
 {
        int rc = 0;
@@ -1012,7 +1106,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
        struct cifs_acl *dacl_ptr = NULL;  /* no need for SACL ptr */
        struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
 
-       if (nmode != NO_CHANGE_64) { /* chmod */
+       if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
                owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->osidoffset));
                group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
@@ -1026,7 +1120,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                ndacl_ptr->num_aces = 0;
 
                rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr,
-                                   nmode, mode_from_sid);
+                                   pnmode, mode_from_sid);
                sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
                /* copy sec desc control portion & owner and group sids */
                copy_sec_desc(pntsd, pnntsd, sidsoffset);
@@ -1101,7 +1195,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 }
 
 struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
-               const struct cifs_fid *cifsfid, u32 *pacllen)
+                                     const struct cifs_fid *cifsfid, u32 *pacllen,
+                                     u32 __maybe_unused unused)
 {
        struct cifs_ntsd *pntsd = NULL;
        unsigned int xid;
@@ -1169,7 +1264,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
 /* Retrieve an ACL from the server */
 struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
                                      struct inode *inode, const char *path,
-                                     u32 *pacllen)
+                              u32 *pacllen, u32 info)
 {
        struct cifs_ntsd *pntsd = NULL;
        struct cifsFileInfo *open_file = NULL;
@@ -1179,7 +1274,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
        if (!open_file)
                return get_cifs_acl_by_path(cifs_sb, path, pacllen);
 
-       pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen);
+       pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info);
        cifsFileInfo_put(open_file);
        return pntsd;
 }
@@ -1244,6 +1339,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
        int rc = 0;
        struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
        struct smb_version_operations *ops;
+       const u32 info = 0;
 
        cifs_dbg(NOISY, "converting ACL to mode for %s\n", path);
 
@@ -1253,9 +1349,9 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
        ops = tlink_tcon(tlink)->ses->server->ops;
 
        if (pfid && (ops->get_acl_by_fid))
-               pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen);
+               pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen, info);
        else if (ops->get_acl)
-               pntsd = ops->get_acl(cifs_sb, inode, path, &acllen);
+               pntsd = ops->get_acl(cifs_sb, inode, path, &acllen, info);
        else {
                cifs_put_tlink(tlink);
                return -EOPNOTSUPP;
@@ -1282,7 +1378,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 
 /* Convert mode bits to an ACL so we can update the ACL on the server */
 int
-id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
+id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
                        kuid_t uid, kgid_t gid)
 {
        int rc = 0;
@@ -1294,6 +1390,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
        struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
        struct smb_version_operations *ops;
        bool mode_from_sid, id_from_sid;
+       const u32 info = 0;
 
        if (IS_ERR(tlink))
                return PTR_ERR(tlink);
@@ -1309,7 +1406,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
                return -EOPNOTSUPP;
        }
 
-       pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen);
+       pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen, info);
        if (IS_ERR(pntsd)) {
                rc = PTR_ERR(pntsd);
                cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
@@ -1341,7 +1438,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
        else
                id_from_sid = false;
 
-       rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid,
+       rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid,
                            mode_from_sid, id_from_sid, &aclflag);
 
        cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
index 45665ff..ff7fd08 100644 (file)
 #define WRITE_BIT       0x2
 #define EXEC_BIT        0x1
 
+#define ACL_OWNER_MASK 0700
+#define ACL_GROUP_MASK 0770
+#define ACL_EVERYONE_MASK 0777
+
 #define UBITSHIFT      6
 #define GBITSHIFT      3
 
index 9daa256..51d53e4 100644 (file)
@@ -661,6 +661,11 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
        unsigned char *tiblob = NULL; /* target info blob */
        __le64 rsp_timestamp;
 
+       if (nls_cp == NULL) {
+               cifs_dbg(VFS, "%s called with nls_cp==NULL\n", __func__);
+               return -EINVAL;
+       }
+
        if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) {
                if (!ses->domainName) {
                        if (ses->domainAuto) {
index 472cb77..ce0d003 100644 (file)
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dfs_cache.h"
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "netlink.h"
+#endif
+#include "fs_context.h"
 
 /*
  * DOS dates from 1980/1/1 through 2107/12/31
@@ -214,7 +218,7 @@ cifs_read_super(struct super_block *sb)
        if (rc)
                goto out_no_root;
        /* tune readahead according to rsize */
-       sb->s_bdi->ra_pages = cifs_sb->rsize / PAGE_SIZE;
+       sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE;
 
        sb->s_blocksize = CIFS_MAX_MSGSIZE;
        sb->s_blocksize_bits = 14;      /* default 2**14 = CIFS_MAX_MSGSIZE */
@@ -458,16 +462,23 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
                seq_puts(s, "loose");
 }
 
-static void
-cifs_show_nls(struct seq_file *s, struct nls_table *cur)
+/*
+ * cifs_show_devname() is used so we show the mount device name with correct
+ * format (e.g. forward slashes vs. back slashes) in /proc/mounts
+ */
+static int cifs_show_devname(struct seq_file *m, struct dentry *root)
 {
-       struct nls_table *def;
-
-       /* Display iocharset= option if it's not default charset */
-       def = load_nls_default();
-       if (def != cur)
-               seq_printf(s, ",iocharset=%s", cur->charset);
-       unload_nls(def);
+       struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
+       char *devname = kstrdup(cifs_sb->ctx->UNC, GFP_KERNEL);
+
+       if (devname == NULL)
+               seq_puts(m, "none");
+       else {
+               convert_delimiter(devname, '/');
+               seq_puts(m, devname);
+               kfree(devname);
+       }
+       return 0;
 }
 
 /*
@@ -489,7 +500,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
        if (tcon->no_lease)
                seq_puts(s, ",nolease");
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
+       if (cifs_sb->ctx->multiuser)
                seq_puts(s, ",multiuser");
        else if (tcon->ses->user_name)
                seq_show_option(s, "username", tcon->ses->user_name);
@@ -514,14 +525,14 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
        }
 
        seq_printf(s, ",uid=%u",
-                  from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid));
+                  from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid));
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
                seq_puts(s, ",forceuid");
        else
                seq_puts(s, ",noforceuid");
 
        seq_printf(s, ",gid=%u",
-                  from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid));
+                  from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid));
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
                seq_puts(s, ",forcegid");
        else
@@ -531,11 +542,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
        if (!tcon->unix_ext)
                seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
-                                          cifs_sb->mnt_file_mode,
-                                          cifs_sb->mnt_dir_mode);
-
-       cifs_show_nls(s, cifs_sb->local_nls);
-
+                                          cifs_sb->ctx->file_mode,
+                                          cifs_sb->ctx->dir_mode);
+       if (cifs_sb->ctx->iocharset)
+               seq_printf(s, ",iocharset=%s", cifs_sb->ctx->iocharset);
        if (tcon->seal)
                seq_puts(s, ",seal");
        else if (tcon->ses->server->ignore_signature)
@@ -605,15 +615,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
                seq_printf(s, ",backupuid=%u",
                           from_kuid_munged(&init_user_ns,
-                                           cifs_sb->mnt_backupuid));
+                                           cifs_sb->ctx->backupuid));
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
                seq_printf(s, ",backupgid=%u",
                           from_kgid_munged(&init_user_ns,
-                                           cifs_sb->mnt_backupgid));
+                                           cifs_sb->ctx->backupgid));
 
-       seq_printf(s, ",rsize=%u", cifs_sb->rsize);
-       seq_printf(s, ",wsize=%u", cifs_sb->wsize);
-       seq_printf(s, ",bsize=%u", cifs_sb->bsize);
+       seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize);
+       seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize);
+       seq_printf(s, ",bsize=%u", cifs_sb->ctx->bsize);
        if (tcon->ses->server->min_offload)
                seq_printf(s, ",esize=%u", tcon->ses->server->min_offload);
        seq_printf(s, ",echo_interval=%lu",
@@ -628,12 +638,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
        if (tcon->handle_timeout)
                seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
        /* convert actimeo and display it in seconds */
-       seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
+       seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ);
 
        if (tcon->ses->chan_max > 1)
                seq_printf(s, ",multichannel,max_channels=%zu",
                           tcon->ses->chan_max);
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       if (tcon->use_witness)
+               seq_puts(s, ",witness");
+#endif
+
        return 0;
 }
 
@@ -681,13 +696,6 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
 }
 #endif
 
-static int cifs_remount(struct super_block *sb, int *flags, char *data)
-{
-       sync_filesystem(sb);
-       *flags |= SB_NODIRATIME;
-       return 0;
-}
-
 static int cifs_drop_inode(struct inode *inode)
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -703,13 +711,14 @@ static const struct super_operations cifs_super_ops = {
        .free_inode = cifs_free_inode,
        .drop_inode     = cifs_drop_inode,
        .evict_inode    = cifs_evict_inode,
+/*     .show_path      = cifs_show_path, */ /* Would we ever need show path? */
+       .show_devname   = cifs_show_devname,
 /*     .delete_inode   = cifs_delete_inode,  */  /* Do not need above
        function unless later we add lazy close of inodes or unless the
        kernel forgets to call us with the same number of releases (closes)
        as opens */
        .show_options = cifs_show_options,
        .umount_begin   = cifs_umount_begin,
-       .remount_fs = cifs_remount,
 #ifdef CONFIG_CIFS_STATS2
        .show_stats = cifs_show_stats,
 #endif
@@ -720,7 +729,7 @@ static const struct super_operations cifs_super_ops = {
  * Return dentry with refcount + 1 on success and NULL otherwise.
  */
 static struct dentry *
-cifs_get_root(struct smb_vol *vol, struct super_block *sb)
+cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb)
 {
        struct dentry *dentry;
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -731,7 +740,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
                return dget(sb->s_root);
 
-       full_path = cifs_build_path_to_root(vol, cifs_sb,
+       full_path = cifs_build_path_to_root(ctx, cifs_sb,
                                cifs_sb_master_tcon(cifs_sb), 0);
        if (full_path == NULL)
                return ERR_PTR(-ENOMEM);
@@ -777,14 +786,13 @@ static int cifs_set_super(struct super_block *sb, void *data)
        return set_anon_super(sb, NULL);
 }
 
-static struct dentry *
+struct dentry *
 cifs_smb3_do_mount(struct file_system_type *fs_type,
-             int flags, const char *dev_name, void *data, bool is_smb3)
+             int flags, struct smb3_fs_context *old_ctx)
 {
        int rc;
        struct super_block *sb;
-       struct cifs_sb_info *cifs_sb;
-       struct smb_vol *volume_info;
+       struct cifs_sb_info *cifs_sb = NULL;
        struct cifs_mnt_data mnt_data;
        struct dentry *root;
 
@@ -793,42 +801,49 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
         *      If CIFS_DEBUG && cifs_FYI
         */
        if (cifsFYI)
-               cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+               cifs_dbg(FYI, "Devname: %s flags: %d\n", old_ctx->UNC, flags);
        else
-               cifs_info("Attempting to mount %s\n", dev_name);
-
-       volume_info = cifs_get_volume_info((char *)data, dev_name, is_smb3);
-       if (IS_ERR(volume_info))
-               return ERR_CAST(volume_info);
+               cifs_info("Attempting to mount %s\n", old_ctx->UNC);
 
        cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
        if (cifs_sb == NULL) {
                root = ERR_PTR(-ENOMEM);
-               goto out_nls;
+               goto out;
        }
 
-       cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
-       if (cifs_sb->mountdata == NULL) {
+       cifs_sb->ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
+       if (!cifs_sb->ctx) {
                root = ERR_PTR(-ENOMEM);
-               goto out_free;
+               goto out;
+       }
+       rc = smb3_fs_context_dup(cifs_sb->ctx, old_ctx);
+       if (rc) {
+               root = ERR_PTR(rc);
+               goto out;
+       }
+
+       rc = cifs_setup_volume_info(cifs_sb->ctx);
+       if (rc) {
+               root = ERR_PTR(rc);
+               goto out;
        }
 
-       rc = cifs_setup_cifs_sb(volume_info, cifs_sb);
+       rc = cifs_setup_cifs_sb(cifs_sb);
        if (rc) {
                root = ERR_PTR(rc);
-               goto out_free;
+               goto out;
        }
 
-       rc = cifs_mount(cifs_sb, volume_info);
+       rc = cifs_mount(cifs_sb, cifs_sb->ctx);
        if (rc) {
                if (!(flags & SB_SILENT))
                        cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
                                 rc);
                root = ERR_PTR(rc);
-               goto out_free;
+               goto out;
        }
 
-       mnt_data.vol = volume_info;
+       mnt_data.ctx = cifs_sb->ctx;
        mnt_data.cifs_sb = cifs_sb;
        mnt_data.flags = flags;
 
@@ -839,12 +854,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
        if (IS_ERR(sb)) {
                root = ERR_CAST(sb);
                cifs_umount(cifs_sb);
+               cifs_sb = NULL;
                goto out;
        }
 
        if (sb->s_root) {
                cifs_dbg(FYI, "Use existing superblock\n");
                cifs_umount(cifs_sb);
+               cifs_sb = NULL;
        } else {
                rc = cifs_read_super(sb);
                if (rc) {
@@ -855,41 +872,24 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
                sb->s_flags |= SB_ACTIVE;
        }
 
-       root = cifs_get_root(volume_info, sb);
+       root = cifs_get_root(cifs_sb ? cifs_sb->ctx : old_ctx, sb);
        if (IS_ERR(root))
                goto out_super;
 
        cifs_dbg(FYI, "dentry root is: %p\n", root);
-       goto out;
+       return root;
 
 out_super:
        deactivate_locked_super(sb);
 out:
-       cifs_cleanup_volume_info(volume_info);
+       if (cifs_sb) {
+               kfree(cifs_sb->prepath);
+               smb3_cleanup_fs_context(cifs_sb->ctx);
+               kfree(cifs_sb);
+       }
        return root;
-
-out_free:
-       kfree(cifs_sb->prepath);
-       kfree(cifs_sb->mountdata);
-       kfree(cifs_sb);
-out_nls:
-       unload_nls(volume_info->local_nls);
-       goto out;
 }
 
-static struct dentry *
-smb3_do_mount(struct file_system_type *fs_type,
-             int flags, const char *dev_name, void *data)
-{
-       return cifs_smb3_do_mount(fs_type, flags, dev_name, data, true);
-}
-
-static struct dentry *
-cifs_do_mount(struct file_system_type *fs_type,
-             int flags, const char *dev_name, void *data)
-{
-       return cifs_smb3_do_mount(fs_type, flags, dev_name, data, false);
-}
 
 static ssize_t
 cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -1026,7 +1026,8 @@ cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv
 struct file_system_type cifs_fs_type = {
        .owner = THIS_MODULE,
        .name = "cifs",
-       .mount = cifs_do_mount,
+       .init_fs_context = smb3_init_fs_context,
+       .parameters = smb3_fs_parameters,
        .kill_sb = cifs_kill_sb,
        .fs_flags = FS_RENAME_DOES_D_MOVE,
 };
@@ -1035,7 +1036,8 @@ MODULE_ALIAS_FS("cifs");
 static struct file_system_type smb3_fs_type = {
        .owner = THIS_MODULE,
        .name = "smb3",
-       .mount = smb3_do_mount,
+       .init_fs_context = smb3_init_fs_context,
+       .parameters = smb3_fs_parameters,
        .kill_sb = cifs_kill_sb,
        .fs_flags = FS_RENAME_DOES_D_MOVE,
 };
@@ -1617,10 +1619,15 @@ init_cifs(void)
        if (rc)
                goto out_destroy_dfs_cache;
 #endif /* CONFIG_CIFS_UPCALL */
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       rc = cifs_genl_init();
+       if (rc)
+               goto out_register_key_type;
+#endif /* CONFIG_CIFS_SWN_UPCALL */
 
        rc = init_cifs_idmap();
        if (rc)
-               goto out_register_key_type;
+               goto out_cifs_swn_init;
 
        rc = register_filesystem(&cifs_fs_type);
        if (rc)
@@ -1636,7 +1643,11 @@ init_cifs(void)
 
 out_init_cifs_idmap:
        exit_cifs_idmap();
+out_cifs_swn_init:
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       cifs_genl_exit();
 out_register_key_type:
+#endif
 #ifdef CONFIG_CIFS_UPCALL
        exit_cifs_spnego();
 out_destroy_dfs_cache:
@@ -1673,6 +1684,9 @@ exit_cifs(void)
        unregister_filesystem(&smb3_fs_type);
        cifs_dfs_release_automount_timer();
        exit_cifs_idmap();
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       cifs_genl_exit();
+#endif
 #ifdef CONFIG_CIFS_UPCALL
        exit_cifs_spnego();
 #endif
index 905d038..2307bb0 100644 (file)
@@ -152,9 +152,13 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern void cifs_setsize(struct inode *inode, loff_t offset);
 extern int cifs_truncate_page(struct address_space *mapping, loff_t from);
 
+struct smb3_fs_context;
+extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
+                                        int flags, struct smb3_fs_context *ctx);
+
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.29"
+#define CIFS_VERSION   "2.30"
 #endif                         /* _CIFSFS_H */
index 484ec2d..50fcb65 100644 (file)
@@ -202,7 +202,7 @@ struct cifs_ses;
 struct cifs_tcon;
 struct dfs_info3_param;
 struct cifs_fattr;
-struct smb_vol;
+struct smb3_fs_context;
 struct cifs_fid;
 struct cifs_readdata;
 struct cifs_writedata;
@@ -268,9 +268,9 @@ struct smb_version_operations {
        /* negotiate to the server */
        int (*negotiate)(const unsigned int, struct cifs_ses *);
        /* set negotiated write size */
-       unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *);
+       unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
        /* set negotiated read size */
-       unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *);
+       unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
        /* setup smb sessionn */
        int (*sess_setup)(const unsigned int, struct cifs_ses *,
                          const struct nls_table *);
@@ -456,9 +456,9 @@ struct smb_version_operations {
                        const char *, const void *, const __u16,
                        const struct nls_table *, struct cifs_sb_info *);
        struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *,
-                       const char *, u32 *);
+                       const char *, u32 *, u32);
        struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *,
-                       const struct cifs_fid *, u32 *);
+                       const struct cifs_fid *, u32 *, u32);
        int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
                        int);
        /* writepages retry size */
@@ -530,97 +530,6 @@ struct smb_version_values {
 #define HEADER_SIZE(server) (server->vals->header_size)
 #define MAX_HEADER_SIZE(server) (server->vals->max_header_size)
 
-struct smb_vol {
-       char *username;
-       char *password;
-       char *domainname;
-       char *UNC;
-       char *iocharset;  /* local code page for mapping to and from Unicode */
-       char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
-       char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
-       kuid_t cred_uid;
-       kuid_t linux_uid;
-       kgid_t linux_gid;
-       kuid_t backupuid;
-       kgid_t backupgid;
-       umode_t file_mode;
-       umode_t dir_mode;
-       enum securityEnum sectype; /* sectype requested via mnt opts */
-       bool sign; /* was signing requested via mnt opts? */
-       bool ignore_signature:1;
-       bool retry:1;
-       bool intr:1;
-       bool setuids:1;
-       bool setuidfromacl:1;
-       bool override_uid:1;
-       bool override_gid:1;
-       bool dynperm:1;
-       bool noperm:1;
-       bool nodelete:1;
-       bool mode_ace:1;
-       bool no_psx_acl:1; /* set if posix acl support should be disabled */
-       bool cifs_acl:1;
-       bool backupuid_specified; /* mount option  backupuid  is specified */
-       bool backupgid_specified; /* mount option  backupgid  is specified */
-       bool no_xattr:1;   /* set if xattr (EA) support should be disabled*/
-       bool server_ino:1; /* use inode numbers from server ie UniqueId */
-       bool direct_io:1;
-       bool strict_io:1; /* strict cache behavior */
-       bool cache_ro:1;
-       bool cache_rw:1;
-       bool remap:1;      /* set to remap seven reserved chars in filenames */
-       bool sfu_remap:1;  /* remap seven reserved chars ala SFU */
-       bool posix_paths:1; /* unset to not ask for posix pathnames. */
-       bool no_linux_ext:1;
-       bool linux_ext:1;
-       bool sfu_emul:1;
-       bool nullauth:1;   /* attempt to authenticate with null user */
-       bool nocase:1;     /* request case insensitive filenames */
-       bool nobrl:1;      /* disable sending byte range locks to srv */
-       bool nohandlecache:1; /* disable caching dir handles if srvr probs */
-       bool mand_lock:1;  /* send mandatory not posix byte range lock reqs */
-       bool seal:1;       /* request transport encryption on share */
-       bool nodfs:1;      /* Do not request DFS, even if available */
-       bool local_lease:1; /* check leases only on local system, not remote */
-       bool noblocksnd:1;
-       bool noautotune:1;
-       bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
-       bool no_lease:1;     /* disable requesting leases */
-       bool fsc:1;     /* enable fscache */
-       bool mfsymlinks:1; /* use Minshall+French Symlinks */
-       bool multiuser:1;
-       bool rwpidforward:1; /* pid forward for read/write operations */
-       bool nosharesock:1;
-       bool persistent:1;
-       bool nopersistent:1;
-       bool resilient:1; /* noresilient not required since not fored for CA */
-       bool domainauto:1;
-       bool rdma:1;
-       bool multichannel:1;
-       bool use_client_guid:1;
-       /* reuse existing guid for multichannel */
-       u8 client_guid[SMB2_CLIENT_GUID_SIZE];
-       unsigned int bsize;
-       unsigned int rsize;
-       unsigned int wsize;
-       unsigned int min_offload;
-       bool sockopt_tcp_nodelay:1;
-       unsigned long actimeo; /* attribute cache timeout (jiffies) */
-       struct smb_version_operations *ops;
-       struct smb_version_values *vals;
-       char *prepath;
-       struct sockaddr_storage dstaddr; /* destination address */
-       struct sockaddr_storage srcaddr; /* allow binding to a local IP */
-       struct nls_table *local_nls;
-       unsigned int echo_interval; /* echo interval in secs */
-       __u64 snapshot_time; /* needed for timewarp tokens */
-       __u32 handle_timeout; /* persistent and durable handle timeout in ms */
-       unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
-       unsigned int max_channels;
-       __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
-       bool rootfs:1; /* if it's a SMB root file system */
-};
-
 /**
  * CIFS superblock mount flags (mnt_cifs_flags) to consider when
  * trying to reuse existing superblock for a new mount
@@ -649,7 +558,7 @@ struct smb_vol {
 
 struct cifs_mnt_data {
        struct cifs_sb_info *cifs_sb;
-       struct smb_vol *vol;
+       struct smb3_fs_context *ctx;
        int flags;
 };
 
@@ -778,6 +687,10 @@ struct TCP_Server_Info {
        int nr_targets;
        bool noblockcnt; /* use non-blocking connect() */
        bool is_channel; /* if a session channel */
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       bool use_swn_dstaddr;
+       struct sockaddr_storage swn_dstaddr;
+#endif
 };
 
 struct cifs_credits {
@@ -1177,6 +1090,9 @@ struct cifs_tcon {
        int remap:2;
        struct list_head ulist; /* cache update list */
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       bool use_witness:1; /* use witness protocol */
+#endif
 };
 
 /*
index 593d826..64fe5a4 100644 (file)
 #define SYNCHRONIZE           0x00100000  /* The file handle can waited on to */
                                          /* synchronize with the completion  */
                                          /* of an input/output request       */
+#define SYSTEM_SECURITY       0x01000000  /* The system access control list   */
+                                         /* can be read and changed          */
 #define GENERIC_ALL           0x10000000
 #define GENERIC_EXECUTE       0x20000000
 #define GENERIC_WRITE         0x40000000
                                | WRITE_OWNER | SYNCHRONIZE)
 #define SET_FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
                                | FILE_READ_EA | FILE_WRITE_EA \
-                               | FILE_DELETE_CHILD | FILE_READ_ATTRIBUTES \
+                               | FILE_READ_ATTRIBUTES \
                                | FILE_WRITE_ATTRIBUTES \
                                | DELETE | READ_CONTROL | WRITE_DAC \
                                | WRITE_OWNER | SYNCHRONIZE)
index 24c6f36..340ff81 100644 (file)
@@ -27,8 +27,8 @@
 #endif
 
 struct statfs;
-struct smb_vol;
 struct smb_rqst;
+struct smb3_fs_context;
 
 /*
  *****************************************************************
@@ -72,14 +72,13 @@ extern void exit_cifs_spnego(void);
 extern char *build_path_from_dentry(struct dentry *);
 extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry,
                                                    bool prefix);
-extern char *cifs_build_path_to_root(struct smb_vol *vol,
+extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx,
                                     struct cifs_sb_info *cifs_sb,
                                     struct cifs_tcon *tcon,
                                     int add_treename);
 extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
 extern char *cifs_compose_mount_options(const char *sb_mountdata,
-               const char *fullpath, const struct dfs_info3_param *ref,
-               char **devname);
+               const char *fullpath, const struct dfs_info3_param *ref);
 /* extern void renew_parental_timestamps(struct dentry *direntry);*/
 extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
                                        struct TCP_Server_Info *server);
@@ -89,6 +88,7 @@ extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_handle_standard(struct TCP_Server_Info *server,
                                struct mid_q_entry *mid);
+extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx);
 extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs);
 extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
 extern int cifs_call_async(struct TCP_Server_Info *server,
@@ -215,12 +215,12 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
                              struct cifs_fattr *fattr, struct inode *inode,
                              bool get_mode_from_special_sid,
                              const char *path, const struct cifs_fid *pfid);
-extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64,
-                                       kuid_t, kgid_t);
+extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
+                                       kuid_t uid, kgid_t gid);
 extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
-                                       const char *, u32 *);
+                                     const char *, u32 *, u32);
 extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *,
-                                               const struct cifs_fid *, u32 *);
+                               const struct cifs_fid *, u32 *, u32);
 extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
                                const char *, int);
 extern unsigned int setup_authusers_ACE(struct cifs_ace *pace);
@@ -234,13 +234,9 @@ extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
                                        struct page *page,
                                        unsigned int page_offset,
                                        unsigned int to_read);
-extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
-                              struct cifs_sb_info *cifs_sb);
+extern int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb);
 extern int cifs_match_super(struct super_block *, void *);
-extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
-extern struct smb_vol *cifs_get_volume_info(char *mount_data,
-                                           const char *devname, bool is_smb3);
-extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol);
+extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx);
 extern void cifs_umount(struct cifs_sb_info *);
 extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
 extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
@@ -256,7 +252,7 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
                                         struct tcon_link *tlink,
                                         struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
-extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb_vol *vol);
+extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
                                 int from_reconnect);
 extern void cifs_put_tcon(struct cifs_tcon *tcon);
@@ -332,7 +328,7 @@ extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
                               const char *searchName, bool is_unicode);
 extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
                                 struct cifs_sb_info *cifs_sb,
-                                struct smb_vol *vol);
+                                struct smb3_fs_context *ctx);
 extern int CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon,
                        struct kstatfs *FSData);
 extern int SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon,
@@ -553,18 +549,15 @@ extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
                        unsigned char *p24);
 
 extern int
-cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
-                      const char *devname, bool is_smb3);
-extern void
-cifs_cleanup_volume_info_contents(struct smb_vol *volume_info);
+cifs_setup_volume_info(struct smb3_fs_context *ctx);
 
 extern struct TCP_Server_Info *
-cifs_find_tcp_session(struct smb_vol *vol);
+cifs_find_tcp_session(struct smb3_fs_context *ctx);
 
 extern void cifs_put_smb_ses(struct cifs_ses *ses);
 
 extern struct cifs_ses *
-cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info);
+cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx);
 
 void cifs_readdata_release(struct kref *refcount);
 int cifs_async_readv(struct cifs_readdata *rdata);
@@ -604,9 +597,7 @@ extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
                                unsigned int *len, unsigned int *offset);
 struct cifs_chan *
 cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
-int cifs_try_adding_channels(struct cifs_ses *ses);
-int cifs_ses_add_channel(struct cifs_ses *ses,
-                               struct cifs_server_iface *iface);
+int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
 bool is_server_using_iface(struct TCP_Server_Info *server,
                           struct cifs_server_iface *iface);
 bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface);
@@ -620,6 +611,8 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov,
 struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server);
 void cifs_put_tcp_super(struct super_block *sb);
 int update_super_prepath(struct cifs_tcon *tcon, char *prefix);
+char *extract_hostname(const char *unc);
+char *extract_sharename(const char *unc);
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
 static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
index 44f9cce..b9df855 100644 (file)
@@ -62,6 +62,9 @@
 #include "dfs_cache.h"
 #endif
 #include "fs_context.h"
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "cifs_swn.h"
+#endif
 
 extern mempool_t *cifs_req_poolp;
 extern bool disable_legacy_dialects;
@@ -73,218 +76,10 @@ extern bool disable_legacy_dialects;
 /* Drop the connection to not overload the server */
 #define NUM_STATUS_IO_TIMEOUT   5
 
-enum {
-       /* Mount options that take no arguments */
-       Opt_user_xattr, Opt_nouser_xattr,
-       Opt_forceuid, Opt_noforceuid,
-       Opt_forcegid, Opt_noforcegid,
-       Opt_noblocksend, Opt_noautotune, Opt_nolease,
-       Opt_hard, Opt_soft, Opt_perm, Opt_noperm, Opt_nodelete,
-       Opt_mapposix, Opt_nomapposix,
-       Opt_mapchars, Opt_nomapchars, Opt_sfu,
-       Opt_nosfu, Opt_nodfs, Opt_posixpaths,
-       Opt_noposixpaths, Opt_nounix, Opt_unix,
-       Opt_nocase,
-       Opt_brl, Opt_nobrl,
-       Opt_handlecache, Opt_nohandlecache,
-       Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
-       Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
-       Opt_nohard, Opt_nosoft,
-       Opt_nointr, Opt_intr,
-       Opt_nostrictsync, Opt_strictsync,
-       Opt_serverino, Opt_noserverino,
-       Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl,
-       Opt_acl, Opt_noacl, Opt_locallease,
-       Opt_sign, Opt_ignore_signature, Opt_seal, Opt_noac,
-       Opt_fsc, Opt_mfsymlinks,
-       Opt_multiuser, Opt_sloppy, Opt_nosharesock,
-       Opt_persistent, Opt_nopersistent,
-       Opt_resilient, Opt_noresilient,
-       Opt_domainauto, Opt_rdma, Opt_modesid, Opt_rootfs,
-       Opt_multichannel, Opt_nomultichannel,
-       Opt_compress,
-
-       /* Mount options which take numeric value */
-       Opt_backupuid, Opt_backupgid, Opt_uid,
-       Opt_cruid, Opt_gid, Opt_file_mode,
-       Opt_dirmode, Opt_port,
-       Opt_min_enc_offload,
-       Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo,
-       Opt_echo_interval, Opt_max_credits, Opt_handletimeout,
-       Opt_snapshot, Opt_max_channels,
-
-       /* Mount options which take string value */
-       Opt_user, Opt_pass, Opt_ip,
-       Opt_domain, Opt_srcaddr, Opt_iocharset,
-       Opt_netbiosname, Opt_servern,
-       Opt_ver, Opt_vers, Opt_sec, Opt_cache,
-
-       /* Mount options to be ignored */
-       Opt_ignore,
-
-       /* Options which could be blank */
-       Opt_blank_pass,
-       Opt_blank_user,
-       Opt_blank_ip,
-
-       Opt_err
-};
-
-static const match_table_t cifs_mount_option_tokens = {
-
-       { Opt_user_xattr, "user_xattr" },
-       { Opt_nouser_xattr, "nouser_xattr" },
-       { Opt_forceuid, "forceuid" },
-       { Opt_noforceuid, "noforceuid" },
-       { Opt_forcegid, "forcegid" },
-       { Opt_noforcegid, "noforcegid" },
-       { Opt_noblocksend, "noblocksend" },
-       { Opt_noautotune, "noautotune" },
-       { Opt_nolease, "nolease" },
-       { Opt_hard, "hard" },
-       { Opt_soft, "soft" },
-       { Opt_perm, "perm" },
-       { Opt_noperm, "noperm" },
-       { Opt_nodelete, "nodelete" },
-       { Opt_mapchars, "mapchars" }, /* SFU style */
-       { Opt_nomapchars, "nomapchars" },
-       { Opt_mapposix, "mapposix" }, /* SFM style */
-       { Opt_nomapposix, "nomapposix" },
-       { Opt_sfu, "sfu" },
-       { Opt_nosfu, "nosfu" },
-       { Opt_nodfs, "nodfs" },
-       { Opt_posixpaths, "posixpaths" },
-       { Opt_noposixpaths, "noposixpaths" },
-       { Opt_nounix, "nounix" },
-       { Opt_nounix, "nolinux" },
-       { Opt_nounix, "noposix" },
-       { Opt_unix, "unix" },
-       { Opt_unix, "linux" },
-       { Opt_unix, "posix" },
-       { Opt_nocase, "nocase" },
-       { Opt_nocase, "ignorecase" },
-       { Opt_brl, "brl" },
-       { Opt_nobrl, "nobrl" },
-       { Opt_handlecache, "handlecache" },
-       { Opt_nohandlecache, "nohandlecache" },
-       { Opt_nobrl, "nolock" },
-       { Opt_forcemandatorylock, "forcemandatorylock" },
-       { Opt_forcemandatorylock, "forcemand" },
-       { Opt_setuids, "setuids" },
-       { Opt_nosetuids, "nosetuids" },
-       { Opt_setuidfromacl, "idsfromsid" },
-       { Opt_dynperm, "dynperm" },
-       { Opt_nodynperm, "nodynperm" },
-       { Opt_nohard, "nohard" },
-       { Opt_nosoft, "nosoft" },
-       { Opt_nointr, "nointr" },
-       { Opt_intr, "intr" },
-       { Opt_nostrictsync, "nostrictsync" },
-       { Opt_strictsync, "strictsync" },
-       { Opt_serverino, "serverino" },
-       { Opt_noserverino, "noserverino" },
-       { Opt_rwpidforward, "rwpidforward" },
-       { Opt_modesid, "modefromsid" },
-       { Opt_cifsacl, "cifsacl" },
-       { Opt_nocifsacl, "nocifsacl" },
-       { Opt_acl, "acl" },
-       { Opt_noacl, "noacl" },
-       { Opt_locallease, "locallease" },
-       { Opt_sign, "sign" },
-       { Opt_ignore_signature, "signloosely" },
-       { Opt_seal, "seal" },
-       { Opt_noac, "noac" },
-       { Opt_fsc, "fsc" },
-       { Opt_mfsymlinks, "mfsymlinks" },
-       { Opt_multiuser, "multiuser" },
-       { Opt_sloppy, "sloppy" },
-       { Opt_nosharesock, "nosharesock" },
-       { Opt_persistent, "persistenthandles"},
-       { Opt_nopersistent, "nopersistenthandles"},
-       { Opt_resilient, "resilienthandles"},
-       { Opt_noresilient, "noresilienthandles"},
-       { Opt_domainauto, "domainauto"},
-       { Opt_rdma, "rdma"},
-       { Opt_multichannel, "multichannel" },
-       { Opt_nomultichannel, "nomultichannel" },
-
-       { Opt_backupuid, "backupuid=%s" },
-       { Opt_backupgid, "backupgid=%s" },
-       { Opt_uid, "uid=%s" },
-       { Opt_cruid, "cruid=%s" },
-       { Opt_gid, "gid=%s" },
-       { Opt_file_mode, "file_mode=%s" },
-       { Opt_dirmode, "dirmode=%s" },
-       { Opt_dirmode, "dir_mode=%s" },
-       { Opt_port, "port=%s" },
-       { Opt_min_enc_offload, "esize=%s" },
-       { Opt_blocksize, "bsize=%s" },
-       { Opt_rsize, "rsize=%s" },
-       { Opt_wsize, "wsize=%s" },
-       { Opt_actimeo, "actimeo=%s" },
-       { Opt_handletimeout, "handletimeout=%s" },
-       { Opt_echo_interval, "echo_interval=%s" },
-       { Opt_max_credits, "max_credits=%s" },
-       { Opt_snapshot, "snapshot=%s" },
-       { Opt_max_channels, "max_channels=%s" },
-       { Opt_compress, "compress=%s" },
-
-       { Opt_blank_user, "user=" },
-       { Opt_blank_user, "username=" },
-       { Opt_user, "user=%s" },
-       { Opt_user, "username=%s" },
-       { Opt_blank_pass, "pass=" },
-       { Opt_blank_pass, "password=" },
-       { Opt_pass, "pass=%s" },
-       { Opt_pass, "password=%s" },
-       { Opt_blank_ip, "ip=" },
-       { Opt_blank_ip, "addr=" },
-       { Opt_ip, "ip=%s" },
-       { Opt_ip, "addr=%s" },
-       { Opt_ignore, "unc=%s" },
-       { Opt_ignore, "target=%s" },
-       { Opt_ignore, "path=%s" },
-       { Opt_domain, "dom=%s" },
-       { Opt_domain, "domain=%s" },
-       { Opt_domain, "workgroup=%s" },
-       { Opt_srcaddr, "srcaddr=%s" },
-       { Opt_ignore, "prefixpath=%s" },
-       { Opt_iocharset, "iocharset=%s" },
-       { Opt_netbiosname, "netbiosname=%s" },
-       { Opt_servern, "servern=%s" },
-       { Opt_ver, "ver=%s" },
-       { Opt_vers, "vers=%s" },
-       { Opt_sec, "sec=%s" },
-       { Opt_cache, "cache=%s" },
-
-       { Opt_ignore, "cred" },
-       { Opt_ignore, "credentials" },
-       { Opt_ignore, "cred=%s" },
-       { Opt_ignore, "credentials=%s" },
-       { Opt_ignore, "guest" },
-       { Opt_ignore, "rw" },
-       { Opt_ignore, "ro" },
-       { Opt_ignore, "suid" },
-       { Opt_ignore, "nosuid" },
-       { Opt_ignore, "exec" },
-       { Opt_ignore, "noexec" },
-       { Opt_ignore, "nodev" },
-       { Opt_ignore, "noauto" },
-       { Opt_ignore, "dev" },
-       { Opt_ignore, "mand" },
-       { Opt_ignore, "nomand" },
-       { Opt_ignore, "relatime" },
-       { Opt_ignore, "_netdev" },
-       { Opt_rootfs, "rootfs" },
-
-       { Opt_err, NULL }
-};
-
 static int ip_connect(struct TCP_Server_Info *server);
 static int generic_ip_connect(struct TCP_Server_Info *server);
 static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
 static void cifs_prune_tlinks(struct work_struct *work);
-static char *extract_hostname(const char *unc);
 
 /*
  * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may
@@ -293,7 +88,7 @@ static char *extract_hostname(const char *unc);
  * This should be called with server->srv_mutex held.
  */
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static int reconn_set_ipaddr(struct TCP_Server_Info *server)
+static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
 {
        int rc;
        int len;
@@ -328,14 +123,7 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server)
 
        return !rc ? -1 : 0;
 }
-#else
-static inline int reconn_set_ipaddr(struct TCP_Server_Info *server)
-{
-       return 0;
-}
-#endif
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
 /* These functions must be called with server->srv_mutex held */
 static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
                                       struct cifs_sb_info *cifs_sb,
@@ -343,6 +131,7 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
                                       struct dfs_cache_tgt_iterator **tgt_it)
 {
        const char *name;
+       int rc;
 
        if (!cifs_sb || !cifs_sb->origin_fullpath)
                return;
@@ -366,6 +155,13 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
                cifs_dbg(FYI,
                         "%s: failed to extract hostname from target: %ld\n",
                         __func__, PTR_ERR(server->hostname));
+               return;
+       }
+
+       rc = reconn_set_ipaddr_from_hostname(server);
+       if (rc) {
+               cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+                        __func__, rc);
        }
 }
 
@@ -517,19 +313,25 @@ cifs_reconnect(struct TCP_Server_Info *server)
                try_to_freeze();
 
                mutex_lock(&server->srv_mutex);
+
+#ifdef CONFIG_CIFS_SWN_UPCALL
+               if (server->use_swn_dstaddr) {
+                       server->dstaddr = server->swn_dstaddr;
+               } else {
+#endif
+
 #ifdef CONFIG_CIFS_DFS_UPCALL
-               /*
-                * Set up next DFS target server (if any) for reconnect. If DFS
-                * feature is disabled, then we will retry last server we
-                * connected to before.
-                */
-               reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
+                       /*
+                        * Set up next DFS target server (if any) for reconnect. If DFS
+                        * feature is disabled, then we will retry last server we
+                        * connected to before.
+                        */
+                       reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
 #endif
-               rc = reconn_set_ipaddr(server);
-               if (rc) {
-                       cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
-                                __func__, rc);
+
+#ifdef CONFIG_CIFS_SWN_UPCALL
                }
+#endif
 
                if (cifs_rdma_enabled(server))
                        rc = smbd_reconnect(server);
@@ -546,6 +348,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
                        if (server->tcpStatus != CifsExiting)
                                server->tcpStatus = CifsNeedNegotiate;
                        spin_unlock(&GlobalMid_Lock);
+#ifdef CONFIG_CIFS_SWN_UPCALL
+                       server->use_swn_dstaddr = false;
+#endif
                        mutex_unlock(&server->srv_mutex);
                }
        } while (server->tcpStatus == CifsNeedReconnect);
@@ -610,6 +415,11 @@ cifs_echo_request(struct work_struct *work)
                cifs_dbg(FYI, "Unable to send echo request to server: %s\n",
                         server->hostname);
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       /* Check witness registrations */
+       cifs_swn_check();
+#endif
+
 requeue_echo:
        queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval);
 }
@@ -1036,6 +846,7 @@ static void
 smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
 {
        struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
+       int scredits = server->credits;
 
        /*
         * SMB1 does not use credits.
@@ -1048,6 +859,13 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
                server->credits += le16_to_cpu(shdr->CreditRequest);
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
+
+               trace_smb3_add_credits(server->CurrentMid,
+                               server->hostname, scredits,
+                               le16_to_cpu(shdr->CreditRequest));
+               cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
+                               __func__, le16_to_cpu(shdr->CreditRequest),
+                               scredits);
        }
 }
 
@@ -1232,1051 +1050,9 @@ next_pdu:
        module_put_and_exit(0);
 }
 
-/* extract the host portion of the UNC string */
-static char *
-extract_hostname(const char *unc)
-{
-       const char *src;
-       char *dst, *delim;
-       unsigned int len;
-
-       /* skip double chars at beginning of string */
-       /* BB: check validity of these bytes? */
-       if (strlen(unc) < 3)
-               return ERR_PTR(-EINVAL);
-       for (src = unc; *src && *src == '\\'; src++)
-               ;
-       if (!*src)
-               return ERR_PTR(-EINVAL);
-
-       /* delimiter between hostname and sharename is always '\\' now */
-       delim = strchr(src, '\\');
-       if (!delim)
-               return ERR_PTR(-EINVAL);
-
-       len = delim - src;
-       dst = kmalloc((len + 1), GFP_KERNEL);
-       if (dst == NULL)
-               return ERR_PTR(-ENOMEM);
-
-       memcpy(dst, src, len);
-       dst[len] = '\0';
-
-       return dst;
-}
-
-static int get_option_ul(substring_t args[], unsigned long *option)
-{
-       int rc;
-       char *string;
-
-       string = match_strdup(args);
-       if (string == NULL)
-               return -ENOMEM;
-       rc = kstrtoul(string, 0, option);
-       kfree(string);
-
-       return rc;
-}
-
-static int get_option_uid(substring_t args[], kuid_t *result)
-{
-       unsigned long value;
-       kuid_t uid;
-       int rc;
-
-       rc = get_option_ul(args, &value);
-       if (rc)
-               return rc;
-
-       uid = make_kuid(current_user_ns(), value);
-       if (!uid_valid(uid))
-               return -EINVAL;
-
-       *result = uid;
-       return 0;
-}
-
-static int get_option_gid(substring_t args[], kgid_t *result)
-{
-       unsigned long value;
-       kgid_t gid;
-       int rc;
-
-       rc = get_option_ul(args, &value);
-       if (rc)
-               return rc;
-
-       gid = make_kgid(current_user_ns(), value);
-       if (!gid_valid(gid))
-               return -EINVAL;
-
-       *result = gid;
-       return 0;
-}
-
-/*
- * Parse a devname into substrings and populate the vol->UNC and vol->prepath
- * fields with the result. Returns 0 on success and an error otherwise.
- */
-static int
-cifs_parse_devname(const char *devname, struct smb_vol *vol)
-{
-       char *pos;
-       const char *delims = "/\\";
-       size_t len;
-
-       if (unlikely(!devname || !*devname)) {
-               cifs_dbg(VFS, "Device name not specified\n");
-               return -EINVAL;
-       }
-
-       /* make sure we have a valid UNC double delimiter prefix */
-       len = strspn(devname, delims);
-       if (len != 2)
-               return -EINVAL;
-
-       /* find delimiter between host and sharename */
-       pos = strpbrk(devname + 2, delims);
-       if (!pos)
-               return -EINVAL;
-
-       /* skip past delimiter */
-       ++pos;
-
-       /* now go until next delimiter or end of string */
-       len = strcspn(pos, delims);
-
-       /* move "pos" up to delimiter or NULL */
-       pos += len;
-       vol->UNC = kstrndup(devname, pos - devname, GFP_KERNEL);
-       if (!vol->UNC)
-               return -ENOMEM;
-
-       convert_delimiter(vol->UNC, '\\');
-
-       /* skip any delimiter */
-       if (*pos == '/' || *pos == '\\')
-               pos++;
-
-       /* If pos is NULL then no prepath */
-       if (!*pos)
-               return 0;
-
-       vol->prepath = kstrdup(pos, GFP_KERNEL);
-       if (!vol->prepath)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static int
-cifs_parse_mount_options(const char *mountdata, const char *devname,
-                        struct smb_vol *vol, bool is_smb3)
-{
-       char *data, *end;
-       char *mountdata_copy = NULL, *options;
-       unsigned int  temp_len, i, j;
-       char separator[2];
-       short int override_uid = -1;
-       short int override_gid = -1;
-       bool uid_specified = false;
-       bool gid_specified = false;
-       bool sloppy = false;
-       char *invalid = NULL;
-       char *nodename = utsname()->nodename;
-       char *string = NULL;
-       char *tmp_end, *value;
-       char delim;
-       bool got_ip = false;
-       bool got_version = false;
-       unsigned short port = 0;
-       struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr;
-
-       separator[0] = ',';
-       separator[1] = 0;
-       delim = separator[0];
-
-       /* ensure we always start with zeroed-out smb_vol */
-       memset(vol, 0, sizeof(*vol));
-
-       /*
-        * does not have to be perfect mapping since field is
-        * informational, only used for servers that do not support
-        * port 445 and it can be overridden at mount time
-        */
-       memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
-       for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
-               vol->source_rfc1001_name[i] = toupper(nodename[i]);
-
-       vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
-       /* null target name indicates to use *SMBSERVR default called name
-          if we end up sending RFC1001 session initialize */
-       vol->target_rfc1001_name[0] = 0;
-       vol->cred_uid = current_uid();
-       vol->linux_uid = current_uid();
-       vol->linux_gid = current_gid();
-       vol->bsize = 1024 * 1024; /* can improve cp performance significantly */
-       /*
-        * default to SFM style remapping of seven reserved characters
-        * unless user overrides it or we negotiate CIFS POSIX where
-        * it is unnecessary.  Can not simultaneously use more than one mapping
-        * since then readdir could list files that open could not open
-        */
-       vol->remap = true;
-
-       /* default to only allowing write access to owner of the mount */
-       vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
-
-       /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
-       /* default is always to request posix paths. */
-       vol->posix_paths = 1;
-       /* default to using server inode numbers where available */
-       vol->server_ino = 1;
-
-       /* default is to use strict cifs caching semantics */
-       vol->strict_io = true;
-
-       vol->actimeo = CIFS_DEF_ACTIMEO;
-
-       /* Most clients set timeout to 0, allows server to use its default */
-       vol->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
-
-       /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
-       vol->ops = &smb30_operations;
-       vol->vals = &smbdefault_values;
-
-       vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
-
-       /* default to no multichannel (single server connection) */
-       vol->multichannel = false;
-       vol->max_channels = 1;
-
-       if (!mountdata)
-               goto cifs_parse_mount_err;
-
-       mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
-       if (!mountdata_copy)
-               goto cifs_parse_mount_err;
-
-       options = mountdata_copy;
-       end = options + strlen(options);
-
-       if (strncmp(options, "sep=", 4) == 0) {
-               if (options[4] != 0) {
-                       separator[0] = options[4];
-                       options += 5;
-               } else {
-                       cifs_dbg(FYI, "Null separator not allowed\n");
-               }
-       }
-       vol->backupuid_specified = false; /* no backup intent for a user */
-       vol->backupgid_specified = false; /* no backup intent for a group */
-
-       switch (cifs_parse_devname(devname, vol)) {
-       case 0:
-               break;
-       case -ENOMEM:
-               cifs_dbg(VFS, "Unable to allocate memory for devname\n");
-               goto cifs_parse_mount_err;
-       case -EINVAL:
-               cifs_dbg(VFS, "Malformed UNC in devname\n");
-               goto cifs_parse_mount_err;
-       default:
-               cifs_dbg(VFS, "Unknown error parsing devname\n");
-               goto cifs_parse_mount_err;
-       }
-
-       while ((data = strsep(&options, separator)) != NULL) {
-               substring_t args[MAX_OPT_ARGS];
-               unsigned long option;
-               int token;
-
-               if (!*data)
-                       continue;
-
-               token = match_token(data, cifs_mount_option_tokens, args);
-
-               switch (token) {
-
-               /* Ingnore the following */
-               case Opt_ignore:
-                       break;
-
-               /* Boolean values */
-               case Opt_user_xattr:
-                       vol->no_xattr = 0;
-                       break;
-               case Opt_nouser_xattr:
-                       vol->no_xattr = 1;
-                       break;
-               case Opt_forceuid:
-                       override_uid = 1;
-                       break;
-               case Opt_noforceuid:
-                       override_uid = 0;
-                       break;
-               case Opt_forcegid:
-                       override_gid = 1;
-                       break;
-               case Opt_noforcegid:
-                       override_gid = 0;
-                       break;
-               case Opt_noblocksend:
-                       vol->noblocksnd = 1;
-                       break;
-               case Opt_noautotune:
-                       vol->noautotune = 1;
-                       break;
-               case Opt_nolease:
-                       vol->no_lease = 1;
-                       break;
-               case Opt_hard:
-                       vol->retry = 1;
-                       break;
-               case Opt_soft:
-                       vol->retry = 0;
-                       break;
-               case Opt_perm:
-                       vol->noperm = 0;
-                       break;
-               case Opt_noperm:
-                       vol->noperm = 1;
-                       break;
-               case Opt_nodelete:
-                       vol->nodelete = 1;
-                       break;
-               case Opt_mapchars:
-                       vol->sfu_remap = true;
-                       vol->remap = false; /* disable SFM mapping */
-                       break;
-               case Opt_nomapchars:
-                       vol->sfu_remap = false;
-                       break;
-               case Opt_mapposix:
-                       vol->remap = true;
-                       vol->sfu_remap = false; /* disable SFU mapping */
-                       break;
-               case Opt_nomapposix:
-                       vol->remap = false;
-                       break;
-               case Opt_sfu:
-                       vol->sfu_emul = 1;
-                       break;
-               case Opt_nosfu:
-                       vol->sfu_emul = 0;
-                       break;
-               case Opt_nodfs:
-                       vol->nodfs = 1;
-                       break;
-               case Opt_rootfs:
-#ifdef CONFIG_CIFS_ROOT
-                       vol->rootfs = true;
-#endif
-                       break;
-               case Opt_posixpaths:
-                       vol->posix_paths = 1;
-                       break;
-               case Opt_noposixpaths:
-                       vol->posix_paths = 0;
-                       break;
-               case Opt_nounix:
-                       if (vol->linux_ext)
-                               cifs_dbg(VFS,
-                                       "conflicting unix mount options\n");
-                       vol->no_linux_ext = 1;
-                       break;
-               case Opt_unix:
-                       if (vol->no_linux_ext)
-                               cifs_dbg(VFS,
-                                       "conflicting unix mount options\n");
-                       vol->linux_ext = 1;
-                       break;
-               case Opt_nocase:
-                       vol->nocase = 1;
-                       break;
-               case Opt_brl:
-                       vol->nobrl =  0;
-                       break;
-               case Opt_nobrl:
-                       vol->nobrl =  1;
-                       /*
-                        * turn off mandatory locking in mode
-                        * if remote locking is turned off since the
-                        * local vfs will do advisory
-                        */
-                       if (vol->file_mode ==
-                               (S_IALLUGO & ~(S_ISUID | S_IXGRP)))
-                               vol->file_mode = S_IALLUGO;
-                       break;
-               case Opt_nohandlecache:
-                       vol->nohandlecache = 1;
-                       break;
-               case Opt_handlecache:
-                       vol->nohandlecache = 0;
-                       break;
-               case Opt_forcemandatorylock:
-                       vol->mand_lock = 1;
-                       break;
-               case Opt_setuids:
-                       vol->setuids = 1;
-                       break;
-               case Opt_nosetuids:
-                       vol->setuids = 0;
-                       break;
-               case Opt_setuidfromacl:
-                       vol->setuidfromacl = 1;
-                       break;
-               case Opt_dynperm:
-                       vol->dynperm = true;
-                       break;
-               case Opt_nodynperm:
-                       vol->dynperm = false;
-                       break;
-               case Opt_nohard:
-                       vol->retry = 0;
-                       break;
-               case Opt_nosoft:
-                       vol->retry = 1;
-                       break;
-               case Opt_nointr:
-                       vol->intr = 0;
-                       break;
-               case Opt_intr:
-                       vol->intr = 1;
-                       break;
-               case Opt_nostrictsync:
-                       vol->nostrictsync = 1;
-                       break;
-               case Opt_strictsync:
-                       vol->nostrictsync = 0;
-                       break;
-               case Opt_serverino:
-                       vol->server_ino = 1;
-                       break;
-               case Opt_noserverino:
-                       vol->server_ino = 0;
-                       break;
-               case Opt_rwpidforward:
-                       vol->rwpidforward = 1;
-                       break;
-               case Opt_modesid:
-                       vol->mode_ace = 1;
-                       break;
-               case Opt_cifsacl:
-                       vol->cifs_acl = 1;
-                       break;
-               case Opt_nocifsacl:
-                       vol->cifs_acl = 0;
-                       break;
-               case Opt_acl:
-                       vol->no_psx_acl = 0;
-                       break;
-               case Opt_noacl:
-                       vol->no_psx_acl = 1;
-                       break;
-               case Opt_locallease:
-                       vol->local_lease = 1;
-                       break;
-               case Opt_sign:
-                       vol->sign = true;
-                       break;
-               case Opt_ignore_signature:
-                       vol->sign = true;
-                       vol->ignore_signature = true;
-                       break;
-               case Opt_seal:
-                       /* we do not do the following in secFlags because seal
-                        * is a per tree connection (mount) not a per socket
-                        * or per-smb connection option in the protocol
-                        * vol->secFlg |= CIFSSEC_MUST_SEAL;
-                        */
-                       vol->seal = 1;
-                       break;
-               case Opt_noac:
-                       pr_warn("Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
-                       break;
-               case Opt_fsc:
-#ifndef CONFIG_CIFS_FSCACHE
-                       cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
-                       goto cifs_parse_mount_err;
-#endif
-                       vol->fsc = true;
-                       break;
-               case Opt_mfsymlinks:
-                       vol->mfsymlinks = true;
-                       break;
-               case Opt_multiuser:
-                       vol->multiuser = true;
-                       break;
-               case Opt_sloppy:
-                       sloppy = true;
-                       break;
-               case Opt_nosharesock:
-                       vol->nosharesock = true;
-                       break;
-               case Opt_nopersistent:
-                       vol->nopersistent = true;
-                       if (vol->persistent) {
-                               cifs_dbg(VFS,
-                                 "persistenthandles mount options conflict\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_persistent:
-                       vol->persistent = true;
-                       if ((vol->nopersistent) || (vol->resilient)) {
-                               cifs_dbg(VFS,
-                                 "persistenthandles mount options conflict\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_resilient:
-                       vol->resilient = true;
-                       if (vol->persistent) {
-                               cifs_dbg(VFS,
-                                 "persistenthandles mount options conflict\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_noresilient:
-                       vol->resilient = false; /* already the default */
-                       break;
-               case Opt_domainauto:
-                       vol->domainauto = true;
-                       break;
-               case Opt_rdma:
-                       vol->rdma = true;
-                       break;
-               case Opt_multichannel:
-                       vol->multichannel = true;
-                       /* if number of channels not specified, default to 2 */
-                       if (vol->max_channels < 2)
-                               vol->max_channels = 2;
-                       break;
-               case Opt_nomultichannel:
-                       vol->multichannel = false;
-                       vol->max_channels = 1;
-                       break;
-               case Opt_compress:
-                       vol->compression = UNKNOWN_TYPE;
-                       cifs_dbg(VFS,
-                               "SMB3 compression support is experimental\n");
-                       break;
-
-               /* Numeric Values */
-               case Opt_backupuid:
-                       if (get_option_uid(args, &vol->backupuid)) {
-                               cifs_dbg(VFS, "%s: Invalid backupuid value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->backupuid_specified = true;
-                       break;
-               case Opt_backupgid:
-                       if (get_option_gid(args, &vol->backupgid)) {
-                               cifs_dbg(VFS, "%s: Invalid backupgid value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->backupgid_specified = true;
-                       break;
-               case Opt_uid:
-                       if (get_option_uid(args, &vol->linux_uid)) {
-                               cifs_dbg(VFS, "%s: Invalid uid value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       uid_specified = true;
-                       break;
-               case Opt_cruid:
-                       if (get_option_uid(args, &vol->cred_uid)) {
-                               cifs_dbg(VFS, "%s: Invalid cruid value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_gid:
-                       if (get_option_gid(args, &vol->linux_gid)) {
-                               cifs_dbg(VFS, "%s: Invalid gid value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       gid_specified = true;
-                       break;
-               case Opt_file_mode:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid file_mode value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->file_mode = option;
-                       break;
-               case Opt_dirmode:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid dir_mode value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->dir_mode = option;
-                       break;
-               case Opt_port:
-                       if (get_option_ul(args, &option) ||
-                           option > USHRT_MAX) {
-                               cifs_dbg(VFS, "%s: Invalid port value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       port = (unsigned short)option;
-                       break;
-               case Opt_min_enc_offload:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "Invalid minimum encrypted read offload size (esize)\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->min_offload = option;
-                       break;
-               case Opt_blocksize:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid blocksize value\n",
-                                       __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       /*
-                        * inode blocksize realistically should never need to be
-                        * less than 16K or greater than 16M and default is 1MB.
-                        * Note that small inode block sizes (e.g. 64K) can lead
-                        * to very poor performance of common tools like cp and scp
-                        */
-                       if ((option < CIFS_MAX_MSGSIZE) ||
-                          (option > (4 * SMB3_DEFAULT_IOSIZE))) {
-                               cifs_dbg(VFS, "%s: Invalid blocksize\n",
-                                       __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->bsize = option;
-                       break;
-               case Opt_rsize:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid rsize value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->rsize = option;
-                       break;
-               case Opt_wsize:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid wsize value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->wsize = option;
-                       break;
-               case Opt_actimeo:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid actimeo value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->actimeo = HZ * option;
-                       if (vol->actimeo > CIFS_MAX_ACTIMEO) {
-                               cifs_dbg(VFS, "attribute cache timeout too large\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_handletimeout:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid handletimeout value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->handle_timeout = option;
-                       if (vol->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
-                               cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_echo_interval:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid echo interval value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->echo_interval = option;
-                       break;
-               case Opt_snapshot:
-                       if (get_option_ul(args, &option)) {
-                               cifs_dbg(VFS, "%s: Invalid snapshot time\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->snapshot_time = option;
-                       break;
-               case Opt_max_credits:
-                       if (get_option_ul(args, &option) || (option < 20) ||
-                           (option > 60000)) {
-                               cifs_dbg(VFS, "%s: Invalid max_credits value\n",
-                                        __func__);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->max_credits = option;
-                       break;
-               case Opt_max_channels:
-                       if (get_option_ul(args, &option) || option < 1 ||
-                               option > CIFS_MAX_CHANNELS) {
-                               cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n",
-                                        __func__, CIFS_MAX_CHANNELS);
-                               goto cifs_parse_mount_err;
-                       }
-                       vol->max_channels = option;
-                       break;
-
-               /* String Arguments */
-
-               case Opt_blank_user:
-                       /* null user, ie. anonymous authentication */
-                       vol->nullauth = 1;
-                       vol->username = NULL;
-                       break;
-               case Opt_user:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
-                                                       CIFS_MAX_USERNAME_LEN) {
-                               pr_warn("username too long\n");
-                               goto cifs_parse_mount_err;
-                       }
-
-                       kfree(vol->username);
-                       vol->username = kstrdup(string, GFP_KERNEL);
-                       if (!vol->username)
-                               goto cifs_parse_mount_err;
-                       break;
-               case Opt_blank_pass:
-                       /* passwords have to be handled differently
-                        * to allow the character used for deliminator
-                        * to be passed within them
-                        */
-
-                       /*
-                        * Check if this is a case where the  password
-                        * starts with a delimiter
-                        */
-                       tmp_end = strchr(data, '=');
-                       tmp_end++;
-                       if (!(tmp_end < end && tmp_end[1] == delim)) {
-                               /* No it is not. Set the password to NULL */
-                               kfree_sensitive(vol->password);
-                               vol->password = NULL;
-                               break;
-                       }
-                       fallthrough;    /* to Opt_pass below */
-               case Opt_pass:
-                       /* Obtain the value string */
-                       value = strchr(data, '=');
-                       value++;
-
-                       /* Set tmp_end to end of the string */
-                       tmp_end = (char *) value + strlen(value);
-
-                       /* Check if following character is the deliminator
-                        * If yes, we have encountered a double deliminator
-                        * reset the NULL character to the deliminator
-                        */
-                       if (tmp_end < end && tmp_end[1] == delim) {
-                               tmp_end[0] = delim;
-
-                               /* Keep iterating until we get to a single
-                                * deliminator OR the end
-                                */
-                               while ((tmp_end = strchr(tmp_end, delim))
-                                       != NULL && (tmp_end[1] == delim)) {
-                                               tmp_end = (char *) &tmp_end[2];
-                               }
-
-                               /* Reset var options to point to next element */
-                               if (tmp_end) {
-                                       tmp_end[0] = '\0';
-                                       options = (char *) &tmp_end[1];
-                               } else
-                                       /* Reached the end of the mount option
-                                        * string */
-                                       options = end;
-                       }
-
-                       kfree_sensitive(vol->password);
-                       /* Now build new password string */
-                       temp_len = strlen(value);
-                       vol->password = kzalloc(temp_len+1, GFP_KERNEL);
-                       if (vol->password == NULL) {
-                               pr_warn("no memory for password\n");
-                               goto cifs_parse_mount_err;
-                       }
-
-                       for (i = 0, j = 0; i < temp_len; i++, j++) {
-                               vol->password[j] = value[i];
-                               if ((value[i] == delim) &&
-                                    value[i+1] == delim)
-                                       /* skip the second deliminator */
-                                       i++;
-                       }
-                       vol->password[j] = '\0';
-                       break;
-               case Opt_blank_ip:
-                       /* FIXME: should this be an error instead? */
-                       got_ip = false;
-                       break;
-               case Opt_ip:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (!cifs_convert_address(dstaddr, string,
-                                       strlen(string))) {
-                               pr_err("bad ip= option (%s)\n", string);
-                               goto cifs_parse_mount_err;
-                       }
-                       got_ip = true;
-                       break;
-               case Opt_domain:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
-                                       == CIFS_MAX_DOMAINNAME_LEN) {
-                               pr_warn("domain name too long\n");
-                               goto cifs_parse_mount_err;
-                       }
-
-                       kfree(vol->domainname);
-                       vol->domainname = kstrdup(string, GFP_KERNEL);
-                       if (!vol->domainname) {
-                               pr_warn("no memory for domainname\n");
-                               goto cifs_parse_mount_err;
-                       }
-                       cifs_dbg(FYI, "Domain name set\n");
-                       break;
-               case Opt_srcaddr:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (!cifs_convert_address(
-                                       (struct sockaddr *)&vol->srcaddr,
-                                       string, strlen(string))) {
-                               pr_warn("Could not parse srcaddr: %s\n",
-                                       string);
-                               goto cifs_parse_mount_err;
-                       }
-                       break;
-               case Opt_iocharset:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (strnlen(string, 1024) >= 65) {
-                               pr_warn("iocharset name too long\n");
-                               goto cifs_parse_mount_err;
-                       }
-
-                        if (strncasecmp(string, "default", 7) != 0) {
-                               kfree(vol->iocharset);
-                               vol->iocharset = kstrdup(string,
-                                                        GFP_KERNEL);
-                               if (!vol->iocharset) {
-                                       pr_warn("no memory for charset\n");
-                                       goto cifs_parse_mount_err;
-                               }
-                       }
-                       /* if iocharset not set then load_nls_default
-                        * is used by caller
-                        */
-                        cifs_dbg(FYI, "iocharset set to %s\n", string);
-                       break;
-               case Opt_netbiosname:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       memset(vol->source_rfc1001_name, 0x20,
-                               RFC1001_NAME_LEN);
-                       /*
-                        * FIXME: are there cases in which a comma can
-                        * be valid in workstation netbios name (and
-                        * need special handling)?
-                        */
-                       for (i = 0; i < RFC1001_NAME_LEN; i++) {
-                               /* don't ucase netbiosname for user */
-                               if (string[i] == 0)
-                                       break;
-                               vol->source_rfc1001_name[i] = string[i];
-                       }
-                       /* The string has 16th byte zero still from
-                        * set at top of the function
-                        */
-                       if (i == RFC1001_NAME_LEN && string[i] != 0)
-                               pr_warn("netbiosname longer than 15 truncated\n");
-                       break;
-               case Opt_servern:
-                       /* servernetbiosname specified override *SMBSERVER */
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       /* last byte, type, is 0x20 for servr type */
-                       memset(vol->target_rfc1001_name, 0x20,
-                               RFC1001_NAME_LEN_WITH_NULL);
-
-                       /* BB are there cases in which a comma can be
-                          valid in this workstation netbios name
-                          (and need special handling)? */
-
-                       /* user or mount helper must uppercase the
-                          netbios name */
-                       for (i = 0; i < 15; i++) {
-                               if (string[i] == 0)
-                                       break;
-                               vol->target_rfc1001_name[i] = string[i];
-                       }
-                       /* The string has 16th byte zero still from
-                          set at top of the function  */
-                       if (i == RFC1001_NAME_LEN && string[i] != 0)
-                               pr_warn("server netbiosname longer than 15 truncated\n");
-                       break;
-               case Opt_ver:
-                       /* version of mount userspace tools, not dialect */
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       /* If interface changes in mount.cifs bump to new ver */
-                       if (strncasecmp(string, "1", 1) == 0) {
-                               if (strlen(string) > 1) {
-                                       pr_warn("Bad mount helper ver=%s. Did you want SMB1 (CIFS) dialect and mean to type vers=1.0 instead?\n",
-                                               string);
-                                       goto cifs_parse_mount_err;
-                               }
-                               /* This is the default */
-                               break;
-                       }
-                       /* For all other value, error */
-                       pr_warn("Invalid mount helper version specified\n");
-                       goto cifs_parse_mount_err;
-               case Opt_vers:
-                       /* protocol version (dialect) */
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (cifs_parse_smb_version(string, vol, is_smb3) != 0)
-                               goto cifs_parse_mount_err;
-                       got_version = true;
-                       break;
-               case Opt_sec:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (cifs_parse_security_flavors(string, vol) != 0)
-                               goto cifs_parse_mount_err;
-                       break;
-               case Opt_cache:
-                       string = match_strdup(args);
-                       if (string == NULL)
-                               goto out_nomem;
-
-                       if (cifs_parse_cache_flavor(string, vol) != 0)
-                               goto cifs_parse_mount_err;
-                       break;
-               default:
-                       /*
-                        * An option we don't recognize. Save it off for later
-                        * if we haven't already found one
-                        */
-                       if (!invalid)
-                               invalid = data;
-                       break;
-               }
-               /* Free up any allocated string */
-               kfree(string);
-               string = NULL;
-       }
-
-       if (!sloppy && invalid) {
-               pr_err("Unknown mount option \"%s\"\n", invalid);
-               goto cifs_parse_mount_err;
-       }
-
-       if (vol->rdma && vol->vals->protocol_id < SMB30_PROT_ID) {
-               cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
-               goto cifs_parse_mount_err;
-       }
-
-#ifndef CONFIG_KEYS
-       /* Muliuser mounts require CONFIG_KEYS support */
-       if (vol->multiuser) {
-               cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
-               goto cifs_parse_mount_err;
-       }
-#endif
-       if (!vol->UNC) {
-               cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
-               goto cifs_parse_mount_err;
-       }
-
-       /* make sure UNC has a share name */
-       if (!strchr(vol->UNC + 3, '\\')) {
-               cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n");
-               goto cifs_parse_mount_err;
-       }
-
-       if (!got_ip) {
-               int len;
-               const char *slash;
-
-               /* No ip= option specified? Try to get it from UNC */
-               /* Use the address part of the UNC. */
-               slash = strchr(&vol->UNC[2], '\\');
-               len = slash - &vol->UNC[2];
-               if (!cifs_convert_address(dstaddr, &vol->UNC[2], len)) {
-                       pr_err("Unable to determine destination address\n");
-                       goto cifs_parse_mount_err;
-               }
-       }
-
-       /* set the port that we got earlier */
-       cifs_set_port(dstaddr, port);
-
-       if (uid_specified)
-               vol->override_uid = override_uid;
-       else if (override_uid == 1)
-               pr_notice("ignoring forceuid mount option specified with no uid= option\n");
-
-       if (gid_specified)
-               vol->override_gid = override_gid;
-       else if (override_gid == 1)
-               pr_notice("ignoring forcegid mount option specified with no gid= option\n");
-
-       if (got_version == false)
-               pr_warn_once("No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3.1.1), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3.1.1 (or even SMB3 or SMB2.1) specify vers=1.0 on mount.\n");
-
-       kfree(mountdata_copy);
-       return 0;
-
-out_nomem:
-       pr_warn("Could not allocate temporary buffer\n");
-cifs_parse_mount_err:
-       kfree(string);
-       kfree(mountdata_copy);
-       return 1;
-}
-
-/** Returns true if srcaddr isn't specified and rhs isn't
- * specified, or if srcaddr is specified and
- * matches the IP address of the rhs argument.
+/**
+ * Returns true if srcaddr isn't specified and rhs isn't specified, or
+ * if srcaddr is specified and matches the IP address of the rhs argument
  */
 bool
 cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs)
@@ -2377,14 +1153,14 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr,
 }
 
 static bool
-match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
+match_security(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
        /*
-        * The select_sectype function should either return the vol->sectype
+        * The select_sectype function should either return the ctx->sectype
         * that was specified, or "Unspecified" if that sectype was not
         * compatible with the given NEGOTIATE request.
         */
-       if (server->ops->select_sectype(server, vol->sectype)
+       if (server->ops->select_sectype(server, ctx->sectype)
             == Unspecified)
                return false;
 
@@ -2393,60 +1169,60 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
         * global_secflags at this point since if MUST_SIGN is set then
         * the server->sign had better be too.
         */
-       if (vol->sign && !server->sign)
+       if (ctx->sign && !server->sign)
                return false;
 
        return true;
 }
 
-static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
+static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
-       struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr;
+       struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr;
 
-       if (vol->nosharesock)
+       if (ctx->nosharesock)
                return 0;
 
        /* If multidialect negotiation see if existing sessions match one */
-       if (strcmp(vol->vals->version_string, SMB3ANY_VERSION_STRING) == 0) {
+       if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) {
                if (server->vals->protocol_id < SMB30_PROT_ID)
                        return 0;
-       } else if (strcmp(vol->vals->version_string,
+       } else if (strcmp(ctx->vals->version_string,
                   SMBDEFAULT_VERSION_STRING) == 0) {
                if (server->vals->protocol_id < SMB21_PROT_ID)
                        return 0;
-       } else if ((server->vals != vol->vals) || (server->ops != vol->ops))
+       } else if ((server->vals != ctx->vals) || (server->ops != ctx->ops))
                return 0;
 
        if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
                return 0;
 
        if (!match_address(server, addr,
-                          (struct sockaddr *)&vol->srcaddr))
+                          (struct sockaddr *)&ctx->srcaddr))
                return 0;
 
        if (!match_port(server, addr))
                return 0;
 
-       if (!match_security(server, vol))
+       if (!match_security(server, ctx))
                return 0;
 
-       if (server->echo_interval != vol->echo_interval * HZ)
+       if (server->echo_interval != ctx->echo_interval * HZ)
                return 0;
 
-       if (server->rdma != vol->rdma)
+       if (server->rdma != ctx->rdma)
                return 0;
 
-       if (server->ignore_signature != vol->ignore_signature)
+       if (server->ignore_signature != ctx->ignore_signature)
                return 0;
 
-       if (server->min_offload != vol->min_offload)
+       if (server->min_offload != ctx->min_offload)
                return 0;
 
        return 1;
 }
 
 struct TCP_Server_Info *
-cifs_find_tcp_session(struct smb_vol *vol)
+cifs_find_tcp_session(struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *server;
 
@@ -2456,7 +1232,7 @@ cifs_find_tcp_session(struct smb_vol *vol)
                 * Skip ses channels since they're only handled in lower layers
                 * (e.g. cifs_send_recv).
                 */
-               if (server->is_channel || !match_server(server, vol))
+               if (server->is_channel || !match_server(server, ctx))
                        continue;
 
                ++server->srv_count;
@@ -2514,15 +1290,15 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 }
 
 struct TCP_Server_Info *
-cifs_get_tcp_session(struct smb_vol *volume_info)
+cifs_get_tcp_session(struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *tcp_ses = NULL;
        int rc;
 
-       cifs_dbg(FYI, "UNC: %s\n", volume_info->UNC);
+       cifs_dbg(FYI, "UNC: %s\n", ctx->UNC);
 
        /* see if we already have a matching tcp_ses */
-       tcp_ses = cifs_find_tcp_session(volume_info);
+       tcp_ses = cifs_find_tcp_session(ctx);
        if (tcp_ses)
                return tcp_ses;
 
@@ -2532,20 +1308,20 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                goto out_err;
        }
 
-       tcp_ses->ops = volume_info->ops;
-       tcp_ses->vals = volume_info->vals;
+       tcp_ses->ops = ctx->ops;
+       tcp_ses->vals = ctx->vals;
        cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
-       tcp_ses->hostname = extract_hostname(volume_info->UNC);
+       tcp_ses->hostname = extract_hostname(ctx->UNC);
        if (IS_ERR(tcp_ses->hostname)) {
                rc = PTR_ERR(tcp_ses->hostname);
                goto out_err_crypto_release;
        }
 
-       tcp_ses->noblockcnt = volume_info->rootfs;
-       tcp_ses->noblocksnd = volume_info->noblocksnd || volume_info->rootfs;
-       tcp_ses->noautotune = volume_info->noautotune;
-       tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
-       tcp_ses->rdma = volume_info->rdma;
+       tcp_ses->noblockcnt = ctx->rootfs;
+       tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs;
+       tcp_ses->noautotune = ctx->noautotune;
+       tcp_ses->tcp_nodelay = ctx->sockopt_tcp_nodelay;
+       tcp_ses->rdma = ctx->rdma;
        tcp_ses->in_flight = 0;
        tcp_ses->max_in_flight = 0;
        tcp_ses->credits = 1;
@@ -2554,26 +1330,26 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
        mutex_init(&tcp_ses->srv_mutex);
        memcpy(tcp_ses->workstation_RFC1001_name,
-               volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
+               ctx->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
        memcpy(tcp_ses->server_RFC1001_name,
-               volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
+               ctx->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
        tcp_ses->session_estab = false;
        tcp_ses->sequence_number = 0;
        tcp_ses->reconnect_instance = 1;
        tcp_ses->lstrp = jiffies;
-       tcp_ses->compress_algorithm = cpu_to_le16(volume_info->compression);
+       tcp_ses->compress_algorithm = cpu_to_le16(ctx->compression);
        spin_lock_init(&tcp_ses->req_lock);
        INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
        INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
        INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
        INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
        mutex_init(&tcp_ses->reconnect_mutex);
-       memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
+       memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
               sizeof(tcp_ses->srcaddr));
-       memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
+       memcpy(&tcp_ses->dstaddr, &ctx->dstaddr,
                sizeof(tcp_ses->dstaddr));
-       if (volume_info->use_client_guid)
-               memcpy(tcp_ses->client_guid, volume_info->client_guid,
+       if (ctx->use_client_guid)
+               memcpy(tcp_ses->client_guid, ctx->client_guid,
                       SMB2_CLIENT_GUID_SIZE);
        else
                generate_random_uuid(tcp_ses->client_guid);
@@ -2585,9 +1361,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        tcp_ses->tcpStatus = CifsNew;
        ++tcp_ses->srv_count;
 
-       if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
-               volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
-               tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+       if (ctx->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+               ctx->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+               tcp_ses->echo_interval = ctx->echo_interval * HZ;
        else
                tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
        if (tcp_ses->rdma) {
@@ -2597,7 +1373,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                goto out_err_crypto_release;
 #endif
                tcp_ses->smbd_conn = smbd_get_connection(
-                       tcp_ses, (struct sockaddr *)&volume_info->dstaddr);
+                       tcp_ses, (struct sockaddr *)&ctx->dstaddr);
                if (tcp_ses->smbd_conn) {
                        cifs_dbg(VFS, "RDMA transport established\n");
                        rc = 0;
@@ -2626,11 +1402,11 @@ smbd_connected:
                module_put(THIS_MODULE);
                goto out_err_crypto_release;
        }
-       tcp_ses->min_offload = volume_info->min_offload;
+       tcp_ses->min_offload = ctx->min_offload;
        tcp_ses->tcpStatus = CifsNeedNegotiate;
 
        tcp_ses->nr_targets = 1;
-       tcp_ses->ignore_signature = volume_info->ignore_signature;
+       tcp_ses->ignore_signature = ctx->ignore_signature;
        /* thread spawned, put it on the list */
        spin_lock(&cifs_tcp_ses_lock);
        list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
@@ -2659,41 +1435,41 @@ out_err:
        return ERR_PTR(rc);
 }
 
-static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
+static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
-       if (vol->sectype != Unspecified &&
-           vol->sectype != ses->sectype)
+       if (ctx->sectype != Unspecified &&
+           ctx->sectype != ses->sectype)
                return 0;
 
        /*
         * If an existing session is limited to less channels than
         * requested, it should not be reused
         */
-       if (ses->chan_max < vol->max_channels)
+       if (ses->chan_max < ctx->max_channels)
                return 0;
 
        switch (ses->sectype) {
        case Kerberos:
-               if (!uid_eq(vol->cred_uid, ses->cred_uid))
+               if (!uid_eq(ctx->cred_uid, ses->cred_uid))
                        return 0;
                break;
        default:
                /* NULL username means anonymous session */
                if (ses->user_name == NULL) {
-                       if (!vol->nullauth)
+                       if (!ctx->nullauth)
                                return 0;
                        break;
                }
 
                /* anything else takes username/password */
                if (strncmp(ses->user_name,
-                           vol->username ? vol->username : "",
+                           ctx->username ? ctx->username : "",
                            CIFS_MAX_USERNAME_LEN))
                        return 0;
-               if ((vol->username && strlen(vol->username) != 0) &&
+               if ((ctx->username && strlen(ctx->username) != 0) &&
                    ses->password != NULL &&
                    strncmp(ses->password,
-                           vol->password ? vol->password : "",
+                           ctx->password ? ctx->password : "",
                            CIFS_MAX_PASSWORD_LEN))
                        return 0;
        }
@@ -2707,11 +1483,10 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
  * tcon_ipc. The IPC tcon has the same lifetime as the session.
  */
 static int
-cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
+cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
        int rc = 0, xid;
        struct cifs_tcon *tcon;
-       struct nls_table *nls_codepage;
        char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0};
        bool seal = false;
        struct TCP_Server_Info *server = ses->server;
@@ -2720,7 +1495,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
         * If the mount request that resulted in the creation of the
         * session requires encryption, force IPC to be encrypted too.
         */
-       if (volume_info->seal) {
+       if (ctx->seal) {
                if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)
                        seal = true;
                else {
@@ -2736,14 +1511,11 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
 
        scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname);
 
-       /* cannot fail */
-       nls_codepage = load_nls_default();
-
        xid = get_xid();
        tcon->ses = ses;
        tcon->ipc = true;
        tcon->seal = seal;
-       rc = server->ops->tree_connect(xid, ses, unc, tcon, nls_codepage);
+       rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls);
        free_xid(xid);
 
        if (rc) {
@@ -2756,7 +1528,6 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
 
        ses->tcon_ipc = tcon;
 out:
-       unload_nls(nls_codepage);
        return rc;
 }
 
@@ -2789,7 +1560,7 @@ cifs_free_ipc(struct cifs_ses *ses)
 }
 
 static struct cifs_ses *
-cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
+cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
        struct cifs_ses *ses;
 
@@ -2797,7 +1568,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
        list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
                if (ses->status == CifsExiting)
                        continue;
-               if (!match_session(ses, vol))
+               if (!match_session(ses, ctx))
                        continue;
                ++ses->ses_count;
                spin_unlock(&cifs_tcp_ses_lock);
@@ -2861,7 +1632,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
 
 /* Populate username and pw fields from keyring if possible */
 static int
-cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
+cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
 {
        int rc = 0;
        int is_domain = 0;
@@ -2941,32 +1712,32 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
                goto out_key_put;
        }
 
-       vol->username = kstrndup(payload, len, GFP_KERNEL);
-       if (!vol->username) {
+       ctx->username = kstrndup(payload, len, GFP_KERNEL);
+       if (!ctx->username) {
                cifs_dbg(FYI, "Unable to allocate %zd bytes for username\n",
                         len);
                rc = -ENOMEM;
                goto out_key_put;
        }
-       cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username);
+       cifs_dbg(FYI, "%s: username=%s\n", __func__, ctx->username);
 
        len = key->datalen - (len + 1);
        if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) {
                cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len);
                rc = -EINVAL;
-               kfree(vol->username);
-               vol->username = NULL;
+               kfree(ctx->username);
+               ctx->username = NULL;
                goto out_key_put;
        }
 
        ++delim;
-       vol->password = kstrndup(delim, len, GFP_KERNEL);
-       if (!vol->password) {
+       ctx->password = kstrndup(delim, len, GFP_KERNEL);
+       if (!ctx->password) {
                cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
                         len);
                rc = -ENOMEM;
-               kfree(vol->username);
-               vol->username = NULL;
+               kfree(ctx->username);
+               ctx->username = NULL;
                goto out_key_put;
        }
 
@@ -2975,17 +1746,17 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
         * for the request.
         */
        if (is_domain && ses->domainName) {
-               vol->domainname = kstrndup(ses->domainName,
+               ctx->domainname = kstrndup(ses->domainName,
                                           strlen(ses->domainName),
                                           GFP_KERNEL);
-               if (!vol->domainname) {
+               if (!ctx->domainname) {
                        cifs_dbg(FYI, "Unable to allocate %zd bytes for domain\n",
                                 len);
                        rc = -ENOMEM;
-                       kfree(vol->username);
-                       vol->username = NULL;
-                       kfree_sensitive(vol->password);
-                       vol->password = NULL;
+                       kfree(ctx->username);
+                       ctx->username = NULL;
+                       kfree_sensitive(ctx->password);
+                       ctx->password = NULL;
                        goto out_key_put;
                }
        }
@@ -3000,7 +1771,7 @@ out_err:
 }
 #else /* ! CONFIG_KEYS */
 static inline int
-cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
+cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)),
                   struct cifs_ses *ses __attribute__((unused)))
 {
        return -ENOSYS;
@@ -3008,14 +1779,14 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
 #endif /* CONFIG_KEYS */
 
 /**
- * cifs_get_smb_ses - get a session matching @volume_info data from @server
+ * cifs_get_smb_ses - get a session matching @ctx data from @server
  *
  * This function assumes it is being called from cifs_mount() where we
  * already got a server reference (server refcount +1). See
  * cifs_get_tcon() for refcount explanations.
  */
 struct cifs_ses *
-cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
+cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
        int rc = -ENOMEM;
        unsigned int xid;
@@ -3025,7 +1796,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 
        xid = get_xid();
 
-       ses = cifs_find_smb_ses(server, volume_info);
+       ses = cifs_find_smb_ses(server, ctx);
        if (ses) {
                cifs_dbg(FYI, "Existing smb sess found (status=%d)\n",
                         ses->status);
@@ -3042,7 +1813,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
                if (ses->need_reconnect) {
                        cifs_dbg(FYI, "Session needs reconnect\n");
                        rc = cifs_setup_session(xid, ses,
-                                               volume_info->local_nls);
+                                               ctx->local_nls);
                        if (rc) {
                                mutex_unlock(&ses->session_mutex);
                                /* problem -- put our reference */
@@ -3071,40 +1842,40 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
        else
                sprintf(ses->serverName, "%pI4", &addr->sin_addr);
 
-       if (volume_info->username) {
-               ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
+       if (ctx->username) {
+               ses->user_name = kstrdup(ctx->username, GFP_KERNEL);
                if (!ses->user_name)
                        goto get_ses_fail;
        }
 
-       /* volume_info->password freed at unmount */
-       if (volume_info->password) {
-               ses->password = kstrdup(volume_info->password, GFP_KERNEL);
+       /* ctx->password freed at unmount */
+       if (ctx->password) {
+               ses->password = kstrdup(ctx->password, GFP_KERNEL);
                if (!ses->password)
                        goto get_ses_fail;
        }
-       if (volume_info->domainname) {
-               ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
+       if (ctx->domainname) {
+               ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
                if (!ses->domainName)
                        goto get_ses_fail;
        }
-       if (volume_info->domainauto)
-               ses->domainAuto = volume_info->domainauto;
-       ses->cred_uid = volume_info->cred_uid;
-       ses->linux_uid = volume_info->linux_uid;
+       if (ctx->domainauto)
+               ses->domainAuto = ctx->domainauto;
+       ses->cred_uid = ctx->cred_uid;
+       ses->linux_uid = ctx->linux_uid;
 
-       ses->sectype = volume_info->sectype;
-       ses->sign = volume_info->sign;
+       ses->sectype = ctx->sectype;
+       ses->sign = ctx->sign;
        mutex_lock(&ses->session_mutex);
 
        /* add server as first channel */
        ses->chans[0].server = server;
        ses->chan_count = 1;
-       ses->chan_max = volume_info->multichannel ? volume_info->max_channels:1;
+       ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
 
        rc = cifs_negotiate_protocol(xid, ses);
        if (!rc)
-               rc = cifs_setup_session(xid, ses, volume_info->local_nls);
+               rc = cifs_setup_session(xid, ses, ctx->local_nls);
 
        /* each channel uses a different signing key */
        memcpy(ses->chans[0].signkey, ses->smb3signingkey,
@@ -3121,7 +1892,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 
        free_xid(xid);
 
-       cifs_setup_ipc(ses, volume_info);
+       cifs_setup_ipc(ses, ctx);
 
        return ses;
 
@@ -3131,27 +1902,27 @@ get_ses_fail:
        return ERR_PTR(rc);
 }
 
-static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        if (tcon->tidStatus == CifsExiting)
                return 0;
-       if (strncmp(tcon->treeName, volume_info->UNC, MAX_TREE_SIZE))
+       if (strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE))
                return 0;
-       if (tcon->seal != volume_info->seal)
+       if (tcon->seal != ctx->seal)
                return 0;
-       if (tcon->snapshot_time != volume_info->snapshot_time)
+       if (tcon->snapshot_time != ctx->snapshot_time)
                return 0;
-       if (tcon->handle_timeout != volume_info->handle_timeout)
+       if (tcon->handle_timeout != ctx->handle_timeout)
                return 0;
-       if (tcon->no_lease != volume_info->no_lease)
+       if (tcon->no_lease != ctx->no_lease)
                return 0;
-       if (tcon->nodelete != volume_info->nodelete)
+       if (tcon->nodelete != ctx->nodelete)
                return 0;
        return 1;
 }
 
 static struct cifs_tcon *
-cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
+cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
        struct list_head *tmp;
        struct cifs_tcon *tcon;
@@ -3163,7 +1934,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                if (tcon->dfs_path)
                        continue;
 #endif
-               if (!match_tcon(tcon, volume_info))
+               if (!match_tcon(tcon, ctx))
                        continue;
                ++tcon->tc_count;
                spin_unlock(&cifs_tcp_ses_lock);
@@ -3194,6 +1965,18 @@ cifs_put_tcon(struct cifs_tcon *tcon)
                return;
        }
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       if (tcon->use_witness) {
+               int rc;
+
+               rc = cifs_swn_unregister(tcon);
+               if (rc < 0) {
+                       cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n",
+                                       __func__, rc);
+               }
+       }
+#endif
+
        list_del_init(&tcon->tcon_list);
        spin_unlock(&cifs_tcp_ses_lock);
 
@@ -3208,7 +1991,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
 }
 
 /**
- * cifs_get_tcon - get a tcon matching @volume_info data from @ses
+ * cifs_get_tcon - get a tcon matching @ctx data from @ses
  *
  * - tcon refcount is the number of mount points using the tcon.
  * - ses refcount is the number of tcon using the session.
@@ -3228,12 +2011,12 @@ cifs_put_tcon(struct cifs_tcon *tcon)
  *    decrement the ses refcount.
  */
 static struct cifs_tcon *
-cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
+cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
        int rc, xid;
        struct cifs_tcon *tcon;
 
-       tcon = cifs_find_tcon(ses, volume_info);
+       tcon = cifs_find_tcon(ses, ctx);
        if (tcon) {
                /*
                 * tcon has refcount already incremented but we need to
@@ -3255,36 +2038,36 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                goto out_fail;
        }
 
-       if (volume_info->snapshot_time) {
+       if (ctx->snapshot_time) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                             "Use SMB2 or later for snapshot mount option\n");
                        rc = -EOPNOTSUPP;
                        goto out_fail;
                } else
-                       tcon->snapshot_time = volume_info->snapshot_time;
+                       tcon->snapshot_time = ctx->snapshot_time;
        }
 
-       if (volume_info->handle_timeout) {
+       if (ctx->handle_timeout) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                             "Use SMB2.1 or later for handle timeout option\n");
                        rc = -EOPNOTSUPP;
                        goto out_fail;
                } else
-                       tcon->handle_timeout = volume_info->handle_timeout;
+                       tcon->handle_timeout = ctx->handle_timeout;
        }
 
        tcon->ses = ses;
-       if (volume_info->password) {
-               tcon->password = kstrdup(volume_info->password, GFP_KERNEL);
+       if (ctx->password) {
+               tcon->password = kstrdup(ctx->password, GFP_KERNEL);
                if (!tcon->password) {
                        rc = -ENOMEM;
                        goto out_fail;
                }
        }
 
-       if (volume_info->seal) {
+       if (ctx->seal) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                                 "SMB3 or later required for encryption\n");
@@ -3300,7 +2083,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                }
        }
 
-       if (volume_info->linux_ext) {
+       if (ctx->linux_ext) {
                if (ses->server->posix_ext_supported) {
                        tcon->posix_extensions = true;
                        pr_warn_once("SMB3.11 POSIX Extensions are experimental\n");
@@ -3316,8 +2099,8 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
         * SetFS as we do on SessSetup and reconnect?
         */
        xid = get_xid();
-       rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon,
-                                           volume_info->local_nls);
+       rc = ses->server->ops->tree_connect(xid, ses, ctx->UNC, tcon,
+                                           ctx->local_nls);
        free_xid(xid);
        cifs_dbg(FYI, "Tcon rc = %d\n", rc);
        if (rc)
@@ -3325,7 +2108,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 
        tcon->use_persistent = false;
        /* check if SMB2 or later, CIFS does not support persistent handles */
-       if (volume_info->persistent) {
+       if (ctx->persistent) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                             "SMB3 or later required for persistent handles\n");
@@ -3342,10 +2125,10 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                }
        } else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY)
             && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES)
-            && (volume_info->nopersistent == false)) {
+            && (ctx->nopersistent == false)) {
                cifs_dbg(FYI, "enabling persistent handles\n");
                tcon->use_persistent = true;
-       } else if (volume_info->resilient) {
+       } else if (ctx->resilient) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                             "SMB2.1 or later required for resilient handles\n");
@@ -3354,23 +2137,52 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
                }
                tcon->use_resilient = true;
        }
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       tcon->use_witness = false;
+       if (ctx->witness) {
+               if (ses->server->vals->protocol_id >= SMB30_PROT_ID) {
+                       if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) {
+                               /*
+                                * Set witness in use flag in first place
+                                * to retry registration in the echo task
+                                */
+                               tcon->use_witness = true;
+                               /* And try to register immediately */
+                               rc = cifs_swn_register(tcon);
+                               if (rc < 0) {
+                                       cifs_dbg(VFS, "Failed to register for witness notifications: %d\n", rc);
+                                       goto out_fail;
+                               }
+                       } else {
+                               /* TODO: try to extend for non-cluster uses (eg multichannel) */
+                               cifs_dbg(VFS, "witness requested on mount but no CLUSTER capability on share\n");
+                               rc = -EOPNOTSUPP;
+                               goto out_fail;
+                       }
+               } else {
+                       cifs_dbg(VFS, "SMB3 or later required for witness option\n");
+                       rc = -EOPNOTSUPP;
+                       goto out_fail;
+               }
+       }
+#endif
 
        /* If the user really knows what they are doing they can override */
        if (tcon->share_flags & SMB2_SHAREFLAG_NO_CACHING) {
-               if (volume_info->cache_ro)
+               if (ctx->cache_ro)
                        cifs_dbg(VFS, "cache=ro requested on mount but NO_CACHING flag set on share\n");
-               else if (volume_info->cache_rw)
+               else if (ctx->cache_rw)
                        cifs_dbg(VFS, "cache=singleclient requested on mount but NO_CACHING flag set on share\n");
        }
 
-       if (volume_info->no_lease) {
+       if (ctx->no_lease) {
                if (ses->server->vals->protocol_id == 0) {
                        cifs_dbg(VFS,
                                "SMB2 or later required for nolease option\n");
                        rc = -EOPNOTSUPP;
                        goto out_fail;
                } else
-                       tcon->no_lease = volume_info->no_lease;
+                       tcon->no_lease = ctx->no_lease;
        }
 
        /*
@@ -3378,14 +2190,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
         * resources mounted more than once to the same server share the last
         * value passed in for the retry flag is used.
         */
-       tcon->retry = volume_info->retry;
-       tcon->nocase = volume_info->nocase;
+       tcon->retry = ctx->retry;
+       tcon->nocase = ctx->nocase;
        if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)
-               tcon->nohandlecache = volume_info->nohandlecache;
+               tcon->nohandlecache = ctx->nohandlecache;
        else
                tcon->nohandlecache = 1;
-       tcon->nodelete = volume_info->nodelete;
-       tcon->local_lease = volume_info->local_lease;
+       tcon->nodelete = ctx->nodelete;
+       tcon->local_lease = ctx->local_lease;
        INIT_LIST_HEAD(&tcon->pending_opens);
 
        spin_lock(&cifs_tcp_ses_lock);
@@ -3440,23 +2252,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
         * We want to share sb only if we don't specify an r/wsize or
         * specified r/wsize is greater than or equal to existing one.
         */
-       if (new->wsize && new->wsize < old->wsize)
+       if (new->ctx->wsize && new->ctx->wsize < old->ctx->wsize)
                return 0;
 
-       if (new->rsize && new->rsize < old->rsize)
+       if (new->ctx->rsize && new->ctx->rsize < old->ctx->rsize)
                return 0;
 
-       if (!uid_eq(old->mnt_uid, new->mnt_uid) || !gid_eq(old->mnt_gid, new->mnt_gid))
+       if (!uid_eq(old->ctx->linux_uid, new->ctx->linux_uid) ||
+           !gid_eq(old->ctx->linux_gid, new->ctx->linux_gid))
                return 0;
 
-       if (old->mnt_file_mode != new->mnt_file_mode ||
-           old->mnt_dir_mode != new->mnt_dir_mode)
+       if (old->ctx->file_mode != new->ctx->file_mode ||
+           old->ctx->dir_mode != new->ctx->dir_mode)
                return 0;
 
        if (strcmp(old->local_nls->charset, new->local_nls->charset))
                return 0;
 
-       if (old->actimeo != new->actimeo)
+       if (old->ctx->actimeo != new->ctx->actimeo)
                return 0;
 
        return 1;
@@ -3484,7 +2297,7 @@ int
 cifs_match_super(struct super_block *sb, void *data)
 {
        struct cifs_mnt_data *mnt_data = (struct cifs_mnt_data *)data;
-       struct smb_vol *volume_info;
+       struct smb3_fs_context *ctx;
        struct cifs_sb_info *cifs_sb;
        struct TCP_Server_Info *tcp_srv;
        struct cifs_ses *ses;
@@ -3503,11 +2316,11 @@ cifs_match_super(struct super_block *sb, void *data)
        ses = tcon->ses;
        tcp_srv = ses->server;
 
-       volume_info = mnt_data->vol;
+       ctx = mnt_data->ctx;
 
-       if (!match_server(tcp_srv, volume_info) ||
-           !match_session(ses, volume_info) ||
-           !match_tcon(tcon, volume_info) ||
+       if (!match_server(tcp_srv, ctx) ||
+           !match_session(ses, ctx) ||
+           !match_tcon(tcon, ctx) ||
            !match_prepath(sb, mnt_data)) {
                rc = 0;
                goto out;
@@ -3792,9 +2605,10 @@ ip_connect(struct TCP_Server_Info *server)
 }
 
 void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
-                         struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info)
+                         struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
-       /* if we are reconnecting then should we check to see if
+       /*
+        * If we are reconnecting then should we check to see if
         * any requested capabilities changed locally e.g. via
         * remount but we can not do much about it here
         * if they have (even if we could detect it by the following)
@@ -3802,15 +2616,16 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
         * or if we change to make all sb to same share the same
         * sb as NFS - then we only have one backpointer to sb.
         * What if we wanted to mount the server share twice once with
-        * and once without posixacls or posix paths? */
+        * and once without posixacls or posix paths?
+        */
        __u64 saved_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
 
-       if (vol_info && vol_info->no_linux_ext) {
+       if (ctx && ctx->no_linux_ext) {
                tcon->fsUnixInfo.Capability = 0;
                tcon->unix_ext = 0; /* Unix Extensions disabled */
                cifs_dbg(FYI, "Linux protocol extensions disabled\n");
                return;
-       } else if (vol_info)
+       } else if (ctx)
                tcon->unix_ext = 1; /* Unix Extensions supported */
 
        if (tcon->unix_ext == 0) {
@@ -3821,11 +2636,15 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
        if (!CIFSSMBQFSUnixInfo(xid, tcon)) {
                __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
                cifs_dbg(FYI, "unix caps which server supports %lld\n", cap);
-               /* check for reconnect case in which we do not
-                  want to change the mount behavior if we can avoid it */
-               if (vol_info == NULL) {
-                       /* turn off POSIX ACL and PATHNAMES if not set
-                          originally at mount time */
+               /*
+                * check for reconnect case in which we do not
+                * want to change the mount behavior if we can avoid it
+                */
+               if (ctx == NULL) {
+                       /*
+                        * turn off POSIX ACL and PATHNAMES if not set
+                        * originally at mount time
+                        */
                        if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0)
                                cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
                        if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
@@ -3842,7 +2661,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
                        cifs_dbg(VFS, "per-share encryption not supported yet\n");
 
                cap &= CIFS_UNIX_CAP_MASK;
-               if (vol_info && vol_info->no_psx_acl)
+               if (ctx && ctx->no_psx_acl)
                        cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
                else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
                        cifs_dbg(FYI, "negotiated posix acl support\n");
@@ -3851,7 +2670,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
                                        CIFS_MOUNT_POSIXACL;
                }
 
-               if (vol_info && vol_info->posix_paths == 0)
+               if (ctx && ctx->posix_paths == 0)
                        cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
                else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
                        cifs_dbg(FYI, "negotiate posix pathnames\n");
@@ -3882,129 +2701,59 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
                        cifs_dbg(FYI, "mandatory transport encryption cap\n");
 #endif /* CIFS_DEBUG2 */
                if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
-                       if (vol_info == NULL) {
+                       if (ctx == NULL)
                                cifs_dbg(FYI, "resetting capabilities failed\n");
-                       else
+                       else
                                cifs_dbg(VFS, "Negotiating Unix capabilities with the server failed. Consider mounting with the Unix Extensions disabled if problems are found by specifying the nounix mount option.\n");
 
                }
        }
 }
 
-int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
-                       struct cifs_sb_info *cifs_sb)
+int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
 {
+       struct smb3_fs_context *ctx = cifs_sb->ctx;
+
        INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
 
        spin_lock_init(&cifs_sb->tlink_tree_lock);
        cifs_sb->tlink_tree = RB_ROOT;
 
-       cifs_sb->bsize = pvolume_info->bsize;
-       /*
-        * Temporarily set r/wsize for matching superblock. If we end up using
-        * new sb then client will later negotiate it downward if needed.
-        */
-       cifs_sb->rsize = pvolume_info->rsize;
-       cifs_sb->wsize = pvolume_info->wsize;
-
-       cifs_sb->mnt_uid = pvolume_info->linux_uid;
-       cifs_sb->mnt_gid = pvolume_info->linux_gid;
-       cifs_sb->mnt_file_mode = pvolume_info->file_mode;
-       cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
        cifs_dbg(FYI, "file mode: %04ho  dir mode: %04ho\n",
-                cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
-
-       cifs_sb->actimeo = pvolume_info->actimeo;
-       cifs_sb->local_nls = pvolume_info->local_nls;
-
-       if (pvolume_info->nodfs)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
-       if (pvolume_info->noperm)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
-       if (pvolume_info->setuids)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
-       if (pvolume_info->setuidfromacl)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
-       if (pvolume_info->server_ino)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
-       if (pvolume_info->remap)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR;
-       if (pvolume_info->sfu_remap)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
-       if (pvolume_info->no_xattr)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
-       if (pvolume_info->sfu_emul)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
-       if (pvolume_info->nobrl)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
-       if (pvolume_info->nohandlecache)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
-       if (pvolume_info->nostrictsync)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
-       if (pvolume_info->mand_lock)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
-       if (pvolume_info->rwpidforward)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
-       if (pvolume_info->mode_ace)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID;
-       if (pvolume_info->cifs_acl)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
-       if (pvolume_info->backupuid_specified) {
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
-               cifs_sb->mnt_backupuid = pvolume_info->backupuid;
-       }
-       if (pvolume_info->backupgid_specified) {
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
-               cifs_sb->mnt_backupgid = pvolume_info->backupgid;
-       }
-       if (pvolume_info->override_uid)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
-       if (pvolume_info->override_gid)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
-       if (pvolume_info->dynperm)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
-       if (pvolume_info->fsc)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
-       if (pvolume_info->multiuser)
-               cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
-                                           CIFS_MOUNT_NO_PERM);
-       if (pvolume_info->strict_io)
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
-       if (pvolume_info->direct_io) {
-               cifs_dbg(FYI, "mounting share using direct i/o\n");
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
+                ctx->file_mode, ctx->dir_mode);
+
+       /* this is needed for ASCII cp to Unicode converts */
+       if (ctx->iocharset == NULL) {
+               /* load_nls_default cannot return null */
+               cifs_sb->local_nls = load_nls_default();
+       } else {
+               cifs_sb->local_nls = load_nls(ctx->iocharset);
+               if (cifs_sb->local_nls == NULL) {
+                       cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n",
+                                ctx->iocharset);
+                       return -ELIBACC;
+               }
        }
-       if (pvolume_info->cache_ro) {
+       ctx->local_nls = cifs_sb->local_nls;
+
+       smb3_update_mnt_flags(cifs_sb);
+
+       if (ctx->direct_io)
+               cifs_dbg(FYI, "mounting share using direct i/o\n");
+       if (ctx->cache_ro) {
                cifs_dbg(VFS, "mounting share with read only caching. Ensure that the share will not be modified while in use.\n");
                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RO_CACHE;
-       } else if (pvolume_info->cache_rw) {
+       } else if (ctx->cache_rw) {
                cifs_dbg(VFS, "mounting share in single client RW caching mode. Ensure that no other systems will be accessing the share.\n");
                cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_RO_CACHE |
                                            CIFS_MOUNT_RW_CACHE);
        }
-       if (pvolume_info->mfsymlinks) {
-               if (pvolume_info->sfu_emul) {
-                       /*
-                        * Our SFU ("Services for Unix" emulation does not allow
-                        * creating symlinks but does allow reading existing SFU
-                        * symlinks (it does allow both creating and reading SFU
-                        * style mknod and FIFOs though). When "mfsymlinks" and
-                        * "sfu" are both enabled at the same time, it allows
-                        * reading both types of symlinks, but will only create
-                        * them with mfsymlinks format. This allows better
-                        * Apple compatibility (probably better for Samba too)
-                        * while still recognizing old Windows style symlinks.
-                        */
-                       cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
-               }
-               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS;
-       }
 
-       if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
+       if ((ctx->cifs_acl) && (ctx->dynperm))
                cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n");
 
-       if (pvolume_info->prepath) {
-               cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL);
+       if (ctx->prepath) {
+               cifs_sb->prepath = kstrdup(ctx->prepath, GFP_KERNEL);
                if (cifs_sb->prepath == NULL)
                        return -ENOMEM;
        }
@@ -4012,26 +2761,6 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
        return 0;
 }
 
-void
-cifs_cleanup_volume_info_contents(struct smb_vol *volume_info)
-{
-       kfree(volume_info->username);
-       kfree_sensitive(volume_info->password);
-       kfree(volume_info->UNC);
-       kfree(volume_info->domainname);
-       kfree(volume_info->iocharset);
-       kfree(volume_info->prepath);
-}
-
-void
-cifs_cleanup_volume_info(struct smb_vol *volume_info)
-{
-       if (!volume_info)
-               return;
-       cifs_cleanup_volume_info_contents(volume_info);
-       kfree(volume_info);
-}
-
 /* Release all succeed connections */
 static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
                                   unsigned int xid,
@@ -4051,7 +2780,7 @@ static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
 }
 
 /* Get connections for tcp, ses and tcon */
-static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
+static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
                           unsigned int *xid,
                           struct TCP_Server_Info **nserver,
                           struct cifs_ses **nses, struct cifs_tcon **ntcon)
@@ -4068,7 +2797,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
        *xid = get_xid();
 
        /* get a reference to a tcp session */
-       server = cifs_get_tcp_session(vol);
+       server = cifs_get_tcp_session(ctx);
        if (IS_ERR(server)) {
                rc = PTR_ERR(server);
                return rc;
@@ -4076,13 +2805,13 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 
        *nserver = server;
 
-       if ((vol->max_credits < 20) || (vol->max_credits > 60000))
+       if ((ctx->max_credits < 20) || (ctx->max_credits > 60000))
                server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
        else
-               server->max_credits = vol->max_credits;
+               server->max_credits = ctx->max_credits;
 
        /* get a reference to a SMB session */
-       ses = cifs_get_smb_ses(server, vol);
+       ses = cifs_get_smb_ses(server, ctx);
        if (IS_ERR(ses)) {
                rc = PTR_ERR(ses);
                return rc;
@@ -4090,14 +2819,14 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 
        *nses = ses;
 
-       if ((vol->persistent == true) && (!(ses->server->capabilities &
+       if ((ctx->persistent == true) && (!(ses->server->capabilities &
                                            SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) {
                cifs_server_dbg(VFS, "persistent handles not supported by server\n");
                return -EOPNOTSUPP;
        }
 
        /* search for existing tcon to this server share */
-       tcon = cifs_get_tcon(ses, vol);
+       tcon = cifs_get_tcon(ses, ctx);
        if (IS_ERR(tcon)) {
                rc = PTR_ERR(tcon);
                return rc;
@@ -4115,7 +2844,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
                 * reset of caps checks mount to see if unix extensions disabled
                 * for just this mount.
                 */
-               reset_cifs_unix_caps(*xid, tcon, cifs_sb, vol);
+               reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx);
                if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
                    (le64_to_cpu(tcon->fsUnixInfo.Capability) &
                     CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP))
@@ -4137,8 +2866,17 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
                }
        }
 
-       cifs_sb->wsize = server->ops->negotiate_wsize(tcon, vol);
-       cifs_sb->rsize = server->ops->negotiate_rsize(tcon, vol);
+       /*
+        * Clamp the rsize/wsize mount arguments if they are too big for the server
+        * and set the rsize/wsize to the negotiated values if not passed in by
+        * the user on mount
+        */
+       if ((cifs_sb->ctx->wsize == 0) ||
+           (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx)))
+               cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx);
+       if ((cifs_sb->ctx->rsize == 0) ||
+           (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
+               cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
 
        return 0;
 }
@@ -4175,13 +2913,13 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
  * exiting connection (tcon)
  */
 static char *
-build_unc_path_to_root(const struct smb_vol *vol,
+build_unc_path_to_root(const struct smb3_fs_context *ctx,
                       const struct cifs_sb_info *cifs_sb, bool useppath)
 {
        char *full_path, *pos;
-       unsigned int pplen = useppath && vol->prepath ?
-               strlen(vol->prepath) + 1 : 0;
-       unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1);
+       unsigned int pplen = useppath && ctx->prepath ?
+               strlen(ctx->prepath) + 1 : 0;
+       unsigned int unc_len = strnlen(ctx->UNC, MAX_TREE_SIZE + 1);
 
        if (unc_len > MAX_TREE_SIZE)
                return ERR_PTR(-EINVAL);
@@ -4190,12 +2928,12 @@ build_unc_path_to_root(const struct smb_vol *vol,
        if (full_path == NULL)
                return ERR_PTR(-ENOMEM);
 
-       memcpy(full_path, vol->UNC, unc_len);
+       memcpy(full_path, ctx->UNC, unc_len);
        pos = full_path + unc_len;
 
        if (pplen) {
                *pos = CIFS_DIR_SEP(cifs_sb);
-               memcpy(pos + 1, vol->prepath, pplen);
+               memcpy(pos + 1, ctx->prepath, pplen);
                pos += pplen;
        }
 
@@ -4208,8 +2946,7 @@ build_unc_path_to_root(const struct smb_vol *vol,
 /**
  * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb
  *
- *
- * If a referral is found, cifs_sb->mountdata will be (re-)allocated
+ * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated
  * to a string containing updated options for the submount.  Otherwise it
  * will be left untouched.
  *
@@ -4218,7 +2955,7 @@ build_unc_path_to_root(const struct smb_vol *vol,
  */
 static int
 expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
-                   struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
+                   struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
                    char *ref_path)
 {
        int rc;
@@ -4228,31 +2965,26 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
                return -EREMOTE;
 
-       full_path = build_unc_path_to_root(volume_info, cifs_sb, true);
+       full_path = build_unc_path_to_root(ctx, cifs_sb, true);
        if (IS_ERR(full_path))
                return PTR_ERR(full_path);
 
        rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
                            ref_path, &referral, NULL);
        if (!rc) {
-               char *fake_devname = NULL;
-
-               mdata = cifs_compose_mount_options(cifs_sb->mountdata,
-                                                  full_path + 1, &referral,
-                                                  &fake_devname);
+               mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+                                                  full_path + 1, &referral);
                free_dfs_info_param(&referral);
 
                if (IS_ERR(mdata)) {
                        rc = PTR_ERR(mdata);
                        mdata = NULL;
                } else {
-                       cifs_cleanup_volume_info_contents(volume_info);
-                       rc = cifs_setup_volume_info(volume_info, mdata,
-                                                   fake_devname, false);
+                       smb3_cleanup_fs_context_contents(ctx);
+                       rc = cifs_setup_volume_info(ctx);
                }
-               kfree(fake_devname);
-               kfree(cifs_sb->mountdata);
-               cifs_sb->mountdata = mdata;
+               kfree(cifs_sb->ctx->mount_options);
+               cifs_sb->ctx->mount_options = mdata;
        }
        kfree(full_path);
        return rc;
@@ -4270,7 +3002,7 @@ static inline int get_next_dfs_tgt(const char *path,
 }
 
 static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
-                          struct smb_vol *fake_vol, struct smb_vol *vol)
+                          struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx)
 {
        const char *tgt = dfs_cache_get_tgt_name(tgt_it);
        int len = strlen(tgt) + 2;
@@ -4281,29 +3013,29 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
                return -ENOMEM;
        scnprintf(new_unc, len, "\\%s", tgt);
 
-       kfree(vol->UNC);
-       vol->UNC = new_unc;
+       kfree(ctx->UNC);
+       ctx->UNC = new_unc;
 
-       if (fake_vol->prepath) {
-               kfree(vol->prepath);
-               vol->prepath = fake_vol->prepath;
-               fake_vol->prepath = NULL;
+       if (fake_ctx->prepath) {
+               kfree(ctx->prepath);
+               ctx->prepath = fake_ctx->prepath;
+               fake_ctx->prepath = NULL;
        }
-       memcpy(&vol->dstaddr, &fake_vol->dstaddr, sizeof(vol->dstaddr));
+       memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr));
 
        return 0;
 }
 
 static int setup_dfs_tgt_conn(const char *path, const char *full_path,
                              const struct dfs_cache_tgt_iterator *tgt_it,
-                             struct cifs_sb_info *cifs_sb, struct smb_vol *vol, unsigned int *xid,
-                             struct TCP_Server_Info **server, struct cifs_ses **ses,
-                             struct cifs_tcon **tcon)
+                             struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+                             unsigned int *xid, struct TCP_Server_Info **server,
+                             struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
        int rc;
        struct dfs_info3_param ref = {0};
-       char *mdata = NULL, *fake_devname = NULL;
-       struct smb_vol fake_vol = {NULL};
+       char *mdata = NULL;
+       struct smb3_fs_context fake_ctx = {NULL};
 
        cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
 
@@ -4311,45 +3043,43 @@ static int setup_dfs_tgt_conn(const char *path, const char *full_path,
        if (rc)
                return rc;
 
-       mdata = cifs_compose_mount_options(cifs_sb->mountdata, full_path + 1, &ref, &fake_devname);
+       mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+                                          full_path + 1, &ref);
        free_dfs_info_param(&ref);
 
        if (IS_ERR(mdata)) {
                rc = PTR_ERR(mdata);
                mdata = NULL;
-       } else {
-               cifs_dbg(FYI, "%s: fake_devname: %s\n", __func__, fake_devname);
-               rc = cifs_setup_volume_info(&fake_vol, mdata, fake_devname,
-                                           false);
-       }
+       } else
+               rc = cifs_setup_volume_info(&fake_ctx);
+
        kfree(mdata);
-       kfree(fake_devname);
 
        if (!rc) {
                /*
-                * We use a 'fake_vol' here because we need pass it down to the
+                * We use a 'fake_ctx' here because we need pass it down to the
                 * mount_{get,put} functions to test connection against new DFS
                 * targets.
                 */
                mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
-               rc = mount_get_conns(&fake_vol, cifs_sb, xid, server, ses,
+               rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses,
                                     tcon);
                if (!rc || (*server && *ses)) {
                        /*
                         * We were able to connect to new target server.
-                        * Update current volume info with new target server.
+                        * Update current context with new target server.
                         */
-                       rc = update_vol_info(tgt_it, &fake_vol, vol);
+                       rc = update_vol_info(tgt_it, &fake_ctx, ctx);
                }
        }
-       cifs_cleanup_volume_info_contents(&fake_vol);
+       smb3_cleanup_fs_context_contents(&fake_ctx);
        return rc;
 }
 
 static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
-                          struct smb_vol *vol, struct cifs_ses *root_ses, unsigned int *xid,
-                          struct TCP_Server_Info **server, struct cifs_ses **ses,
-                          struct cifs_tcon **tcon)
+                          struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
+                          unsigned int *xid, struct TCP_Server_Info **server,
+                          struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
        int rc;
        struct dfs_cache_tgt_list tgt_list;
@@ -4368,7 +3098,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
                if (rc)
                        break;
                /* Connect to next DFS target */
-               rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, vol, xid, server, ses,
+               rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses,
                                        tcon);
                if (!rc || (*server && *ses))
                        break;
@@ -4388,22 +3118,21 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 }
 #endif
 
+/* TODO: all callers to this are broken. We are not parsing mount_options here
+ * we should pass a clone of the original context?
+ */
 int
-cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
-                       const char *devname, bool is_smb3)
+cifs_setup_volume_info(struct smb3_fs_context *ctx)
 {
        int rc = 0;
 
-       if (cifs_parse_mount_options(mount_data, devname, volume_info, is_smb3))
-               return -EINVAL;
-
-       if (volume_info->nullauth) {
+       if (ctx->nullauth) {
                cifs_dbg(FYI, "Anonymous login\n");
-               kfree(volume_info->username);
-               volume_info->username = NULL;
-       } else if (volume_info->username) {
+               kfree(ctx->username);
+               ctx->username = NULL;
+       } else if (ctx->username) {
                /* BB fixme parse for domain name here */
-               cifs_dbg(FYI, "Username: %s\n", volume_info->username);
+               cifs_dbg(FYI, "Username: %s\n", ctx->username);
        } else {
                cifs_dbg(VFS, "No username specified\n");
        /* In userspace mount helper we can get user name from alternate
@@ -4411,41 +3140,9 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
                return -EINVAL;
        }
 
-       /* this is needed for ASCII cp to Unicode converts */
-       if (volume_info->iocharset == NULL) {
-               /* load_nls_default cannot return null */
-               volume_info->local_nls = load_nls_default();
-       } else {
-               volume_info->local_nls = load_nls(volume_info->iocharset);
-               if (volume_info->local_nls == NULL) {
-                       cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n",
-                                volume_info->iocharset);
-                       return -ELIBACC;
-               }
-       }
-
        return rc;
 }
 
-struct smb_vol *
-cifs_get_volume_info(char *mount_data, const char *devname, bool is_smb3)
-{
-       int rc;
-       struct smb_vol *volume_info;
-
-       volume_info = kmalloc(sizeof(struct smb_vol), GFP_KERNEL);
-       if (!volume_info)
-               return ERR_PTR(-ENOMEM);
-
-       rc = cifs_setup_volume_info(volume_info, mount_data, devname, is_smb3);
-       if (rc) {
-               cifs_cleanup_volume_info(volume_info);
-               volume_info = ERR_PTR(rc);
-       }
-
-       return volume_info;
-}
-
 static int
 cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
                                        unsigned int xid,
@@ -4497,7 +3194,7 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
  * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
  * otherwise 0.
  */
-static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
                          const unsigned int xid,
                          struct TCP_Server_Info *server,
                          struct cifs_tcon *tcon)
@@ -4511,7 +3208,7 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
        /*
         * cifs_build_path_to_root works only when we have a valid tcon
         */
-       full_path = cifs_build_path_to_root(vol, cifs_sb, tcon,
+       full_path = cifs_build_path_to_root(ctx, cifs_sb, tcon,
                                            tcon->Flags & SMB_SHARE_IS_IN_DFS);
        if (full_path == NULL)
                return -ENOMEM;
@@ -4560,7 +3257,7 @@ static void put_root_ses(struct cifs_ses *ses)
 }
 
 /* Check if a path component is remote and then update @dfs_path accordingly */
-static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
                             const unsigned int xid, struct TCP_Server_Info *server,
                             struct cifs_tcon *tcon, char **dfs_path)
 {
@@ -4571,7 +3268,7 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
        int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
        int skip = added_treename;
 
-       path = cifs_build_path_to_root(vol, cifs_sb, tcon, added_treename);
+       path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
        if (!path)
                return -ENOMEM;
 
@@ -4602,17 +3299,17 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
                *s = 0;
                rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
                if (rc && rc == -EREMOTE) {
-                       struct smb_vol v = {NULL};
+                       struct smb3_fs_context v = {NULL};
                        /* if @path contains a tree name, skip it in the prefix path */
                        if (added_treename) {
-                               rc = cifs_parse_devname(path, &v);
+                               rc = smb3_parse_devname(path, &v);
                                if (rc)
                                        break;
                                rc = -EREMOTE;
                                npath = build_unc_path_to_root(&v, cifs_sb, true);
-                               cifs_cleanup_volume_info_contents(&v);
+                               smb3_cleanup_fs_context_contents(&v);
                        } else {
-                               v.UNC = vol->UNC;
+                               v.UNC = ctx->UNC;
                                v.prepath = path + 1;
                                npath = build_unc_path_to_root(&v, cifs_sb, true);
                        }
@@ -4630,7 +3327,7 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
        return rc;
 }
 
-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
        int rc = 0;
        unsigned int xid;
@@ -4642,7 +3339,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
        char *oldmnt = NULL;
        char *mntdata = NULL;
 
-       rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+       rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
        /*
         * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
         * DFS mount.
@@ -4650,26 +3347,27 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
         * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
         * to respond with PATH_NOT_COVERED to requests that include the prefix.
         */
-       if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), vol->UNC + 1, NULL,
+       if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
                           NULL)) {
                /* No DFS referral was returned.  Looks like a regular share. */
                if (rc)
                        goto error;
                /* Check if it is fully accessible and then mount it */
-               rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+               rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
                if (!rc)
                        goto out;
                if (rc != -EREMOTE)
                        goto error;
        }
        /* Save mount options */
-       mntdata = kstrndup(cifs_sb->mountdata, strlen(cifs_sb->mountdata), GFP_KERNEL);
+       mntdata = kstrndup(cifs_sb->ctx->mount_options,
+                          strlen(cifs_sb->ctx->mount_options), GFP_KERNEL);
        if (!mntdata) {
                rc = -ENOMEM;
                goto error;
        }
        /* Get path of DFS root */
-       ref_path = build_unc_path_to_root(vol, cifs_sb, false);
+       ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
        if (IS_ERR(ref_path)) {
                rc = PTR_ERR(ref_path);
                ref_path = NULL;
@@ -4680,25 +3378,25 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
        do {
                /* Save full path of last DFS path we used to resolve final target server */
                kfree(full_path);
-               full_path = build_unc_path_to_root(vol, cifs_sb, !!count);
+               full_path = build_unc_path_to_root(ctx, cifs_sb, !!count);
                if (IS_ERR(full_path)) {
                        rc = PTR_ERR(full_path);
                        full_path = NULL;
                        break;
                }
                /* Chase referral */
-               oldmnt = cifs_sb->mountdata;
-               rc = expand_dfs_referral(xid, root_ses, vol, cifs_sb, ref_path + 1);
+               oldmnt = cifs_sb->ctx->mount_options;
+               rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1);
                if (rc)
                        break;
                /* Connect to new DFS target only if we were redirected */
-               if (oldmnt != cifs_sb->mountdata) {
+               if (oldmnt != cifs_sb->ctx->mount_options) {
                        mount_put_conns(cifs_sb, xid, server, ses, tcon);
-                       rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+                       rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
                }
                if (rc && !server && !ses) {
                        /* Failed to connect. Try to connect to other targets in the referral. */
-                       rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, vol, root_ses, &xid,
+                       rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid,
                                             &server, &ses, &tcon);
                }
                if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses)
@@ -4711,7 +3409,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
                        set_root_ses(cifs_sb, ses, &root_ses);
                }
                /* Check for remaining path components and then continue chasing them (-EREMOTE) */
-               rc = check_dfs_prepath(cifs_sb, vol, xid, server, tcon, &ref_path);
+               rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
                /* Prevent recursion on broken link referrals */
                if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
                        rc = -ELOOP;
@@ -4742,8 +3440,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
        tcon->remap = cifs_remap(cifs_sb);
        spin_unlock(&cifs_tcp_ses_lock);
 
-       /* Add original volume information for DFS cache to be used when refreshing referrals */
-       rc = dfs_cache_add_vol(mntdata, vol, cifs_sb->origin_fullpath);
+       /* Add original context for DFS cache to be used when refreshing referrals */
+       rc = dfs_cache_add_vol(mntdata, ctx, cifs_sb->origin_fullpath);
        if (rc)
                goto error;
        /*
@@ -4758,12 +3456,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
         */
        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
        kfree(cifs_sb->prepath);
-       cifs_sb->prepath = vol->prepath;
-       vol->prepath = NULL;
+       cifs_sb->prepath = ctx->prepath;
+       ctx->prepath = NULL;
 
 out:
        free_xid(xid);
-       cifs_try_adding_channels(ses);
+       cifs_try_adding_channels(cifs_sb, ses);
        return mount_setup_tlink(cifs_sb, ses, tcon);
 
 error:
@@ -4776,7 +3474,7 @@ error:
        return rc;
 }
 #else
-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
        int rc = 0;
        unsigned int xid;
@@ -4784,12 +3482,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
        struct cifs_tcon *tcon;
        struct TCP_Server_Info *server;
 
-       rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+       rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
        if (rc)
                goto error;
 
        if (tcon) {
-               rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+               rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
                if (rc == -EREMOTE)
                        rc = -EOPNOTSUPP;
                if (rc)
@@ -4970,9 +3668,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
 
 static void delayed_free(struct rcu_head *p)
 {
-       struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
-       unload_nls(sbi->local_nls);
-       kfree(sbi);
+       struct cifs_sb_info *cifs_sb = container_of(p, struct cifs_sb_info, rcu);
+
+       unload_nls(cifs_sb->local_nls);
+       smb3_cleanup_fs_context(cifs_sb->ctx);
+       kfree(cifs_sb);
 }
 
 void
@@ -4997,7 +3697,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
        }
        spin_unlock(&cifs_sb->tlink_tree_lock);
 
-       kfree(cifs_sb->mountdata);
        kfree(cifs_sb->prepath);
 #ifdef CONFIG_CIFS_DFS_UPCALL
        dfs_cache_del_vol(cifs_sb->origin_fullpath);
@@ -5066,15 +3765,15 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 }
 
 static int
-cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses)
+cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
 {
-       vol->sectype = ses->sectype;
+       ctx->sectype = ses->sectype;
 
        /* krb5 is special, since we don't need username or pw */
-       if (vol->sectype == Kerberos)
+       if (ctx->sectype == Kerberos)
                return 0;
 
-       return cifs_set_cifscreds(vol, ses);
+       return cifs_set_cifscreds(ctx, ses);
 }
 
 static struct cifs_tcon *
@@ -5084,31 +3783,34 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
        struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
        struct cifs_ses *ses;
        struct cifs_tcon *tcon = NULL;
-       struct smb_vol *vol_info;
+       struct smb3_fs_context *ctx;
 
-       vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
-       if (vol_info == NULL)
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (ctx == NULL)
                return ERR_PTR(-ENOMEM);
 
-       vol_info->local_nls = cifs_sb->local_nls;
-       vol_info->linux_uid = fsuid;
-       vol_info->cred_uid = fsuid;
-       vol_info->UNC = master_tcon->treeName;
-       vol_info->retry = master_tcon->retry;
-       vol_info->nocase = master_tcon->nocase;
-       vol_info->nohandlecache = master_tcon->nohandlecache;
-       vol_info->local_lease = master_tcon->local_lease;
-       vol_info->no_lease = master_tcon->no_lease;
-       vol_info->resilient = master_tcon->use_resilient;
-       vol_info->persistent = master_tcon->use_persistent;
-       vol_info->handle_timeout = master_tcon->handle_timeout;
-       vol_info->no_linux_ext = !master_tcon->unix_ext;
-       vol_info->linux_ext = master_tcon->posix_extensions;
-       vol_info->sectype = master_tcon->ses->sectype;
-       vol_info->sign = master_tcon->ses->sign;
-       vol_info->seal = master_tcon->seal;
-
-       rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
+       ctx->local_nls = cifs_sb->local_nls;
+       ctx->linux_uid = fsuid;
+       ctx->cred_uid = fsuid;
+       ctx->UNC = master_tcon->treeName;
+       ctx->retry = master_tcon->retry;
+       ctx->nocase = master_tcon->nocase;
+       ctx->nohandlecache = master_tcon->nohandlecache;
+       ctx->local_lease = master_tcon->local_lease;
+       ctx->no_lease = master_tcon->no_lease;
+       ctx->resilient = master_tcon->use_resilient;
+       ctx->persistent = master_tcon->use_persistent;
+       ctx->handle_timeout = master_tcon->handle_timeout;
+       ctx->no_linux_ext = !master_tcon->unix_ext;
+       ctx->linux_ext = master_tcon->posix_extensions;
+       ctx->sectype = master_tcon->ses->sectype;
+       ctx->sign = master_tcon->ses->sign;
+       ctx->seal = master_tcon->seal;
+#ifdef CONFIG_CIFS_SWN_UPCALL
+       ctx->witness = master_tcon->use_witness;
+#endif
+
+       rc = cifs_set_vol_auth(ctx, master_tcon->ses);
        if (rc) {
                tcon = ERR_PTR(rc);
                goto out;
@@ -5119,26 +3821,26 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
        ++master_tcon->ses->server->srv_count;
        spin_unlock(&cifs_tcp_ses_lock);
 
-       ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
+       ses = cifs_get_smb_ses(master_tcon->ses->server, ctx);
        if (IS_ERR(ses)) {
                tcon = (struct cifs_tcon *)ses;
                cifs_put_tcp_session(master_tcon->ses->server, 0);
                goto out;
        }
 
-       tcon = cifs_get_tcon(ses, vol_info);
+       tcon = cifs_get_tcon(ses, ctx);
        if (IS_ERR(tcon)) {
                cifs_put_smb_ses(ses);
                goto out;
        }
 
        if (cap_unix(ses))
-               reset_cifs_unix_caps(0, tcon, NULL, vol_info);
+               reset_cifs_unix_caps(0, tcon, NULL, ctx);
 
 out:
-       kfree(vol_info->username);
-       kfree_sensitive(vol_info->password);
-       kfree(vol_info);
+       kfree(ctx->username);
+       kfree_sensitive(ctx->password);
+       kfree(ctx);
 
        return tcon;
 }
index 6ee8496..6ad6ba5 100644 (file)
@@ -18,6 +18,7 @@
 #include "cifs_debug.h"
 #include "cifs_unicode.h"
 #include "smb2glob.h"
+#include "fs_context.h"
 
 #include "dfs_cache.h"
 
@@ -48,8 +49,8 @@ struct cache_entry {
 
 struct vol_info {
        char *fullpath;
-       spinlock_t smb_vol_lock;
-       struct smb_vol smb_vol;
+       spinlock_t ctx_lock;
+       struct smb3_fs_context ctx;
        char *mntdata;
        struct list_head list;
        struct list_head rlist;
@@ -586,7 +587,7 @@ static void __vol_release(struct vol_info *vi)
 {
        kfree(vi->fullpath);
        kfree(vi->mntdata);
-       cifs_cleanup_volume_info_contents(&vi->smb_vol);
+       smb3_cleanup_fs_context_contents(&vi->ctx);
        kfree(vi);
 }
 
@@ -1140,80 +1141,22 @@ out_unlock:
        return rc;
 }
 
-static int dup_vol(struct smb_vol *vol, struct smb_vol *new)
-{
-       memcpy(new, vol, sizeof(*new));
-
-       if (vol->username) {
-               new->username = kstrndup(vol->username, strlen(vol->username),
-                                        GFP_KERNEL);
-               if (!new->username)
-                       return -ENOMEM;
-       }
-       if (vol->password) {
-               new->password = kstrndup(vol->password, strlen(vol->password),
-                                        GFP_KERNEL);
-               if (!new->password)
-                       goto err_free_username;
-       }
-       if (vol->UNC) {
-               cifs_dbg(FYI, "%s: vol->UNC: %s\n", __func__, vol->UNC);
-               new->UNC = kstrndup(vol->UNC, strlen(vol->UNC), GFP_KERNEL);
-               if (!new->UNC)
-                       goto err_free_password;
-       }
-       if (vol->domainname) {
-               new->domainname = kstrndup(vol->domainname,
-                                          strlen(vol->domainname), GFP_KERNEL);
-               if (!new->domainname)
-                       goto err_free_unc;
-       }
-       if (vol->iocharset) {
-               new->iocharset = kstrndup(vol->iocharset,
-                                         strlen(vol->iocharset), GFP_KERNEL);
-               if (!new->iocharset)
-                       goto err_free_domainname;
-       }
-       if (vol->prepath) {
-               cifs_dbg(FYI, "%s: vol->prepath: %s\n", __func__, vol->prepath);
-               new->prepath = kstrndup(vol->prepath, strlen(vol->prepath),
-                                       GFP_KERNEL);
-               if (!new->prepath)
-                       goto err_free_iocharset;
-       }
-
-       return 0;
-
-err_free_iocharset:
-       kfree(new->iocharset);
-err_free_domainname:
-       kfree(new->domainname);
-err_free_unc:
-       kfree(new->UNC);
-err_free_password:
-       kfree_sensitive(new->password);
-err_free_username:
-       kfree(new->username);
-       kfree(new);
-       return -ENOMEM;
-}
-
 /**
- * dfs_cache_add_vol - add a cifs volume during mount() that will be handled by
+ * dfs_cache_add_vol - add a cifs context during mount() that will be handled by
  * DFS cache refresh worker.
  *
  * @mntdata: mount data.
- * @vol: cifs volume.
+ * @ctx: cifs context.
  * @fullpath: origin full path.
  *
- * Return zero if volume was set up correctly, otherwise non-zero.
+ * Return zero if context was set up correctly, otherwise non-zero.
  */
-int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, const char *fullpath)
+int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx, const char *fullpath)
 {
        int rc;
        struct vol_info *vi;
 
-       if (!vol || !fullpath || !mntdata)
+       if (!ctx || !fullpath || !mntdata)
                return -EINVAL;
 
        cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
@@ -1228,12 +1171,12 @@ int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, const char *fullpath)
                goto err_free_vi;
        }
 
-       rc = dup_vol(vol, &vi->smb_vol);
+       rc = smb3_fs_context_dup(&vi->ctx, ctx);
        if (rc)
                goto err_free_fullpath;
 
        vi->mntdata = mntdata;
-       spin_lock_init(&vi->smb_vol_lock);
+       spin_lock_init(&vi->ctx_lock);
        kref_init(&vi->refcnt);
 
        spin_lock(&vol_list_lock);
@@ -1289,10 +1232,10 @@ int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server)
        spin_unlock(&vol_list_lock);
 
        cifs_dbg(FYI, "%s: updating volume info\n", __func__);
-       spin_lock(&vi->smb_vol_lock);
-       memcpy(&vi->smb_vol.dstaddr, &server->dstaddr,
-              sizeof(vi->smb_vol.dstaddr));
-       spin_unlock(&vi->smb_vol_lock);
+       spin_lock(&vi->ctx_lock);
+       memcpy(&vi->ctx.dstaddr, &server->dstaddr,
+              sizeof(vi->ctx.dstaddr));
+       spin_unlock(&vi->ctx_lock);
 
        kref_put(&vi->refcnt, vol_release);
 
@@ -1445,11 +1388,11 @@ static inline void put_tcp_server(struct TCP_Server_Info *server)
        cifs_put_tcp_session(server, 0);
 }
 
-static struct TCP_Server_Info *get_tcp_server(struct smb_vol *vol)
+static struct TCP_Server_Info *get_tcp_server(struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *server;
 
-       server = cifs_find_tcp_session(vol);
+       server = cifs_find_tcp_session(ctx);
        if (IS_ERR_OR_NULL(server))
                return NULL;
 
@@ -1473,10 +1416,10 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi,
        int rc;
        struct cache_entry *ce;
        struct dfs_info3_param ref = {0};
-       char *mdata = NULL, *devname = NULL;
+       char *mdata = NULL;
        struct TCP_Server_Info *server;
        struct cifs_ses *ses;
-       struct smb_vol vol = {NULL};
+       struct smb3_fs_context ctx = {NULL};
 
        rpath = get_dfs_root(path);
        if (IS_ERR(rpath))
@@ -1500,8 +1443,7 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi,
 
        up_read(&htable_rw_lock);
 
-       mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref,
-                                          &devname);
+       mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref);
        free_dfs_info_param(&ref);
 
        if (IS_ERR(mdata)) {
@@ -1510,24 +1452,23 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi,
                goto out;
        }
 
-       rc = cifs_setup_volume_info(&vol, mdata, devname, false);
-       kfree(devname);
+       rc = cifs_setup_volume_info(&ctx);
 
        if (rc) {
                ses = ERR_PTR(rc);
                goto out;
        }
 
-       server = get_tcp_server(&vol);
+       server = get_tcp_server(&ctx);
        if (!server) {
                ses = ERR_PTR(-EHOSTDOWN);
                goto out;
        }
 
-       ses = cifs_get_smb_ses(server, &vol);
+       ses = cifs_get_smb_ses(server, &ctx);
 
 out:
-       cifs_cleanup_volume_info_contents(&vol);
+       smb3_cleanup_fs_context_contents(&ctx);
        kfree(mdata);
        kfree(rpath);
 
@@ -1619,7 +1560,7 @@ static void refresh_cache_worker(struct work_struct *work)
         */
        spin_lock(&vol_list_lock);
        list_for_each_entry(vi, &vol_list, list) {
-               server = get_tcp_server(&vi->smb_vol);
+               server = get_tcp_server(&vi->ctx);
                if (!server)
                        continue;
 
@@ -1631,9 +1572,9 @@ static void refresh_cache_worker(struct work_struct *work)
 
        /* Walk through all TCONs and refresh any expired cache entry */
        list_for_each_entry_safe(vi, nvi, &vols, rlist) {
-               spin_lock(&vi->smb_vol_lock);
-               server = get_tcp_server(&vi->smb_vol);
-               spin_unlock(&vi->smb_vol_lock);
+               spin_lock(&vi->ctx_lock);
+               server = get_tcp_server(&vi->ctx);
+               spin_unlock(&vi->ctx_lock);
 
                if (!server)
                        goto next_vol;
index 3d7c051..1afc4f5 100644 (file)
@@ -44,7 +44,7 @@ dfs_cache_noreq_update_tgthint(const char *path,
 extern int dfs_cache_get_tgt_referral(const char *path,
                                      const struct dfs_cache_tgt_iterator *it,
                                      struct dfs_info3_param *ref);
-extern int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol,
+extern int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx,
                        const char *fullpath);
 extern int dfs_cache_update_vol(const char *fullpath,
                                struct TCP_Server_Info *server);
index 398c1ee..68900f1 100644 (file)
@@ -33,6 +33,7 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
+#include "fs_context.h"
 
 static void
 renew_parental_timestamps(struct dentry *direntry)
@@ -46,10 +47,10 @@ renew_parental_timestamps(struct dentry *direntry)
 }
 
 char *
-cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
+cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
                        struct cifs_tcon *tcon, int add_treename)
 {
-       int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0;
+       int pplen = ctx->prepath ? strlen(ctx->prepath) + 1 : 0;
        int dfsplen;
        char *full_path = NULL;
 
@@ -71,7 +72,7 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
        if (dfsplen)
                memcpy(full_path, tcon->treeName, dfsplen);
        full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb);
-       memcpy(full_path + dfsplen + 1, vol->prepath, pplen);
+       memcpy(full_path + dfsplen + 1, ctx->prepath, pplen);
        convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
        return full_path;
 }
index be46fab..6d00190 100644 (file)
@@ -44,6 +44,7 @@
 #include "cifs_fs_sb.h"
 #include "fscache.h"
 #include "smbdirect.h"
+#include "fs_context.h"
 
 static inline int cifs_convert_flags(unsigned int flags)
 {
@@ -416,6 +417,8 @@ static void cifsFileInfo_put_work(struct work_struct *work)
  * cifsFileInfo_put - release a reference of file priv data
  *
  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
+ *
+ * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
@@ -431,8 +434,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
  *
  * If @wait_for_oplock_handler is true and we are releasing the last
  * reference, wait for any running oplock break handler of the file
- * and cancel any pending one. If calling this function from the
- * oplock break handler, you need to pass false.
+ * and cancel any pending one.
+ *
+ * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
+ * @wait_oplock_handler: must be false if called from oplock_break_handler
+ * @offload:   not offloaded on close and oplock breaks
  *
  */
 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
@@ -566,7 +572,7 @@ int cifs_open(struct inode *inode, struct file *file)
                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
                /* can not refresh inode info since size could be stale */
                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
-                               cifs_sb->mnt_file_mode /* ignored */,
+                               cifs_sb->ctx->file_mode /* ignored */,
                                file->f_flags, &oplock, &fid.netfid, xid);
                if (rc == 0) {
                        cifs_dbg(FYI, "posix open succeeded\n");
@@ -735,7 +741,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 
                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
-                                    cifs_sb->mnt_file_mode /* ignored */,
+                                    cifs_sb->ctx->file_mode /* ignored */,
                                     oflags, &oplock, &cfile->fid.netfid, xid);
                if (rc == 0) {
                        cifs_dbg(FYI, "posix reopen succeeded\n");
@@ -2330,7 +2336,7 @@ static int cifs_writepages(struct address_space *mapping,
         * If wsize is smaller than the page cache size, default to writing
         * one page at a time via cifs_writepage
         */
-       if (cifs_sb->wsize < PAGE_SIZE)
+       if (cifs_sb->ctx->wsize < PAGE_SIZE)
                return generic_writepages(mapping, wbc);
 
        xid = get_xid();
@@ -2363,7 +2369,7 @@ retry:
                if (rc)
                        get_file_rc = rc;
 
-               rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
                                                   &wsize, credits);
                if (rc != 0) {
                        done = true;
@@ -2905,7 +2911,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                                break;
                }
 
-               rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
                                                   &wsize, credits);
                if (rc)
                        break;
@@ -3636,7 +3642,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                                break;
                }
 
-               rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
                                                   &rsize, credits);
                if (rc)
                        break;
@@ -4022,7 +4028,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
        cifs_sb = CIFS_FILE_SB(file);
 
        /* FIXME: set up handlers for larger reads and/or convert to async */
-       rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
+       rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
 
        if (file->private_data == NULL) {
                rc = -EBADF;
@@ -4407,7 +4413,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                                break;
                }
 
-               rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+               rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
                                                   &rsize, credits);
                if (rc)
                        break;
index ad6c2fe..0afccbb 100644 (file)
@@ -6,8 +6,32 @@
  *              David Howells <dhowells@redhat.com>
  */
 
+/*
+#include <linux/module.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+*/
+
+#include <linux/ctype.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/utsname.h>
+#include "cifsfs.h"
+#include "cifspdu.h"
 #include "cifsglob.h"
+#include "cifsproto.h"
+#include "cifs_unicode.h"
 #include "cifs_debug.h"
+#include "cifs_fs_sb.h"
+#include "ntlmssp.h"
+#include "nterr.h"
+#include "rfc1002pdu.h"
 #include "fs_context.h"
 
 static const match_table_t cifs_smb_version_tokens = {
@@ -24,77 +48,6 @@ static const match_table_t cifs_smb_version_tokens = {
        { Smb_version_err, NULL }
 };
 
-int
-cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
-{
-       substring_t args[MAX_OPT_ARGS];
-
-       switch (match_token(value, cifs_smb_version_tokens, args)) {
-#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
-       case Smb_1:
-               if (disable_legacy_dialects) {
-                       cifs_dbg(VFS, "mount with legacy dialect disabled\n");
-                       return 1;
-               }
-               if (is_smb3) {
-                       cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
-                       return 1;
-               }
-               cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
-               vol->ops = &smb1_operations;
-               vol->vals = &smb1_values;
-               break;
-       case Smb_20:
-               if (disable_legacy_dialects) {
-                       cifs_dbg(VFS, "mount with legacy dialect disabled\n");
-                       return 1;
-               }
-               if (is_smb3) {
-                       cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
-                       return 1;
-               }
-               vol->ops = &smb20_operations;
-               vol->vals = &smb20_values;
-               break;
-#else
-       case Smb_1:
-               cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
-               return 1;
-       case Smb_20:
-               cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
-               return 1;
-#endif /* CIFS_ALLOW_INSECURE_LEGACY */
-       case Smb_21:
-               vol->ops = &smb21_operations;
-               vol->vals = &smb21_values;
-               break;
-       case Smb_30:
-               vol->ops = &smb30_operations;
-               vol->vals = &smb30_values;
-               break;
-       case Smb_302:
-               vol->ops = &smb30_operations; /* currently identical with 3.0 */
-               vol->vals = &smb302_values;
-               break;
-       case Smb_311:
-               vol->ops = &smb311_operations;
-               vol->vals = &smb311_values;
-               break;
-       case Smb_3any:
-               vol->ops = &smb30_operations; /* currently identical with 3.0 */
-               vol->vals = &smb3any_values;
-               break;
-       case Smb_default:
-               vol->ops = &smb30_operations; /* currently identical with 3.0 */
-               vol->vals = &smbdefault_values;
-               break;
-       default:
-               cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
-               return 1;
-       }
-       return 0;
-}
-
 static const match_table_t cifs_secflavor_tokens = {
        { Opt_sec_krb5, "krb5" },
        { Opt_sec_krb5i, "krb5i" },
@@ -112,7 +65,123 @@ static const match_table_t cifs_secflavor_tokens = {
        { Opt_sec_err, NULL }
 };
 
-int cifs_parse_security_flavors(char *value, struct smb_vol *vol)
+const struct fs_parameter_spec smb3_fs_parameters[] = {
+       /* Mount options that take no arguments */
+       fsparam_flag_no("user_xattr", Opt_user_xattr),
+       fsparam_flag_no("forceuid", Opt_forceuid),
+       fsparam_flag_no("multichannel", Opt_multichannel),
+       fsparam_flag_no("forcegid", Opt_forcegid),
+       fsparam_flag("noblocksend", Opt_noblocksend),
+       fsparam_flag("noautotune", Opt_noautotune),
+       fsparam_flag("nolease", Opt_nolease),
+       fsparam_flag_no("hard", Opt_hard),
+       fsparam_flag_no("soft", Opt_soft),
+       fsparam_flag_no("perm", Opt_perm),
+       fsparam_flag("nodelete", Opt_nodelete),
+       fsparam_flag_no("mapposix", Opt_mapposix),
+       fsparam_flag("mapchars", Opt_mapchars),
+       fsparam_flag("nomapchars", Opt_nomapchars),
+       fsparam_flag_no("sfu", Opt_sfu),
+       fsparam_flag("nodfs", Opt_nodfs),
+       fsparam_flag_no("posixpaths", Opt_posixpaths),
+       fsparam_flag_no("unix", Opt_unix),
+       fsparam_flag_no("linux", Opt_unix),
+       fsparam_flag_no("posix", Opt_unix),
+       fsparam_flag("nocase", Opt_nocase),
+       fsparam_flag("ignorecase", Opt_nocase),
+       fsparam_flag_no("brl", Opt_brl),
+       fsparam_flag_no("handlecache", Opt_handlecache),
+       fsparam_flag("forcemandatorylock", Opt_forcemandatorylock),
+       fsparam_flag("forcemand", Opt_forcemandatorylock),
+       fsparam_flag("setuidfromacl", Opt_setuidfromacl),
+       fsparam_flag("idsfromsid", Opt_setuidfromacl),
+       fsparam_flag_no("setuids", Opt_setuids),
+       fsparam_flag_no("dynperm", Opt_dynperm),
+       fsparam_flag_no("intr", Opt_intr),
+       fsparam_flag_no("strictsync", Opt_strictsync),
+       fsparam_flag_no("serverino", Opt_serverino),
+       fsparam_flag("rwpidforward", Opt_rwpidforward),
+       fsparam_flag("cifsacl", Opt_cifsacl),
+       fsparam_flag_no("acl", Opt_acl),
+       fsparam_flag("locallease", Opt_locallease),
+       fsparam_flag("sign", Opt_sign),
+       fsparam_flag("ignore_signature", Opt_ignore_signature),
+       fsparam_flag("signloosely", Opt_ignore_signature),
+       fsparam_flag("seal", Opt_seal),
+       fsparam_flag("noac", Opt_noac),
+       fsparam_flag("fsc", Opt_fsc),
+       fsparam_flag("mfsymlinks", Opt_mfsymlinks),
+       fsparam_flag("multiuser", Opt_multiuser),
+       fsparam_flag("sloppy", Opt_sloppy),
+       fsparam_flag("nosharesock", Opt_nosharesock),
+       fsparam_flag_no("persistenthandles", Opt_persistent),
+       fsparam_flag_no("resilienthandles", Opt_resilient),
+       fsparam_flag("domainauto", Opt_domainauto),
+       fsparam_flag("rdma", Opt_rdma),
+       fsparam_flag("modesid", Opt_modesid),
+       fsparam_flag("modefromsid", Opt_modesid),
+       fsparam_flag("rootfs", Opt_rootfs),
+       fsparam_flag("compress", Opt_compress),
+       fsparam_flag("witness", Opt_witness),
+
+       /* Mount options which take numeric value */
+       fsparam_u32("backupuid", Opt_backupuid),
+       fsparam_u32("backupgid", Opt_backupgid),
+       fsparam_u32("uid", Opt_uid),
+       fsparam_u32("cruid", Opt_cruid),
+       fsparam_u32("gid", Opt_gid),
+       fsparam_u32("file_mode", Opt_file_mode),
+       fsparam_u32("dirmode", Opt_dirmode),
+       fsparam_u32("dir_mode", Opt_dirmode),
+       fsparam_u32("port", Opt_port),
+       fsparam_u32("min_enc_offload", Opt_min_enc_offload),
+       fsparam_u32("esize", Opt_min_enc_offload),
+       fsparam_u32("bsize", Opt_blocksize),
+       fsparam_u32("rsize", Opt_rsize),
+       fsparam_u32("wsize", Opt_wsize),
+       fsparam_u32("actimeo", Opt_actimeo),
+       fsparam_u32("echo_interval", Opt_echo_interval),
+       fsparam_u32("max_credits", Opt_max_credits),
+       fsparam_u32("handletimeout", Opt_handletimeout),
+       fsparam_u32("snapshot", Opt_snapshot),
+       fsparam_u32("max_channels", Opt_max_channels),
+
+       /* Mount options which take string value */
+       fsparam_string("source", Opt_source),
+       fsparam_string("unc", Opt_source),
+       fsparam_string("user", Opt_user),
+       fsparam_string("username", Opt_user),
+       fsparam_string("pass", Opt_pass),
+       fsparam_string("password", Opt_pass),
+       fsparam_string("ip", Opt_ip),
+       fsparam_string("addr", Opt_ip),
+       fsparam_string("domain", Opt_domain),
+       fsparam_string("dom", Opt_domain),
+       fsparam_string("srcaddr", Opt_srcaddr),
+       fsparam_string("iocharset", Opt_iocharset),
+       fsparam_string("netbiosname", Opt_netbiosname),
+       fsparam_string("servern", Opt_servern),
+       fsparam_string("ver", Opt_ver),
+       fsparam_string("vers", Opt_vers),
+       fsparam_string("sec", Opt_sec),
+       fsparam_string("cache", Opt_cache),
+
+       /* Arguments that should be ignored */
+       fsparam_flag("guest", Opt_ignore),
+       fsparam_flag("noatime", Opt_ignore),
+       fsparam_flag("relatime", Opt_ignore),
+       fsparam_flag("_netdev", Opt_ignore),
+       fsparam_flag_no("suid", Opt_ignore),
+       fsparam_flag_no("exec", Opt_ignore),
+       fsparam_flag_no("dev", Opt_ignore),
+       fsparam_flag_no("mand", Opt_ignore),
+       fsparam_string("cred", Opt_ignore),
+       fsparam_string("credentials", Opt_ignore),
+       {}
+};
+
+int
+cifs_parse_security_flavors(char *value, struct smb3_fs_context *ctx)
 {
 
        substring_t args[MAX_OPT_ARGS];
@@ -121,44 +190,44 @@ int cifs_parse_security_flavors(char *value, struct smb_vol *vol)
         * With mount options, the last one should win. Reset any existing
         * settings back to default.
         */
-       vol->sectype = Unspecified;
-       vol->sign = false;
+       ctx->sectype = Unspecified;
+       ctx->sign = false;
 
        switch (match_token(value, cifs_secflavor_tokens, args)) {
        case Opt_sec_krb5p:
                cifs_dbg(VFS, "sec=krb5p is not supported!\n");
                return 1;
        case Opt_sec_krb5i:
-               vol->sign = true;
+               ctx->sign = true;
                fallthrough;
        case Opt_sec_krb5:
-               vol->sectype = Kerberos;
+               ctx->sectype = Kerberos;
                break;
        case Opt_sec_ntlmsspi:
-               vol->sign = true;
+               ctx->sign = true;
                fallthrough;
        case Opt_sec_ntlmssp:
-               vol->sectype = RawNTLMSSP;
+               ctx->sectype = RawNTLMSSP;
                break;
        case Opt_sec_ntlmi:
-               vol->sign = true;
+               ctx->sign = true;
                fallthrough;
        case Opt_ntlm:
-               vol->sectype = NTLM;
+               ctx->sectype = NTLM;
                break;
        case Opt_sec_ntlmv2i:
-               vol->sign = true;
+               ctx->sign = true;
                fallthrough;
        case Opt_sec_ntlmv2:
-               vol->sectype = NTLMv2;
+               ctx->sectype = NTLMv2;
                break;
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
        case Opt_sec_lanman:
-               vol->sectype = LANMAN;
+               ctx->sectype = LANMAN;
                break;
 #endif
        case Opt_sec_none:
-               vol->nullauth = 1;
+               ctx->nullauth = 1;
                break;
        default:
                cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -178,40 +247,40 @@ static const match_table_t cifs_cacheflavor_tokens = {
 };
 
 int
-cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
+cifs_parse_cache_flavor(char *value, struct smb3_fs_context *ctx)
 {
        substring_t args[MAX_OPT_ARGS];
 
        switch (match_token(value, cifs_cacheflavor_tokens, args)) {
        case Opt_cache_loose:
-               vol->direct_io = false;
-               vol->strict_io = false;
-               vol->cache_ro = false;
-               vol->cache_rw = false;
+               ctx->direct_io = false;
+               ctx->strict_io = false;
+               ctx->cache_ro = false;
+               ctx->cache_rw = false;
                break;
        case Opt_cache_strict:
-               vol->direct_io = false;
-               vol->strict_io = true;
-               vol->cache_ro = false;
-               vol->cache_rw = false;
+               ctx->direct_io = false;
+               ctx->strict_io = true;
+               ctx->cache_ro = false;
+               ctx->cache_rw = false;
                break;
        case Opt_cache_none:
-               vol->direct_io = true;
-               vol->strict_io = false;
-               vol->cache_ro = false;
-               vol->cache_rw = false;
+               ctx->direct_io = true;
+               ctx->strict_io = false;
+               ctx->cache_ro = false;
+               ctx->cache_rw = false;
                break;
        case Opt_cache_ro:
-               vol->direct_io = false;
-               vol->strict_io = false;
-               vol->cache_ro = true;
-               vol->cache_rw = false;
+               ctx->direct_io = false;
+               ctx->strict_io = false;
+               ctx->cache_ro = true;
+               ctx->cache_rw = false;
                break;
        case Opt_cache_rw:
-               vol->direct_io = false;
-               vol->strict_io = false;
-               vol->cache_ro = false;
-               vol->cache_rw = true;
+               ctx->direct_io = false;
+               ctx->strict_io = false;
+               ctx->cache_ro = false;
+               ctx->cache_rw = true;
                break;
        default:
                cifs_dbg(VFS, "bad cache= option: %s\n", value);
@@ -219,3 +288,1253 @@ cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
        }
        return 0;
 }
+
+#define DUP_CTX_STR(field)                                             \
+do {                                                                   \
+       if (ctx->field) {                                               \
+               new_ctx->field = kstrdup(ctx->field, GFP_ATOMIC);       \
+               if (new_ctx->field == NULL) {                           \
+                       smb3_cleanup_fs_context_contents(new_ctx);      \
+                       return -ENOMEM;                                 \
+               }                                                       \
+       }                                                               \
+} while (0)
+
+int
+smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx)
+{
+       int rc = 0;
+
+       memcpy(new_ctx, ctx, sizeof(*ctx));
+       new_ctx->prepath = NULL;
+       new_ctx->mount_options = NULL;
+       new_ctx->nodename = NULL;
+       new_ctx->username = NULL;
+       new_ctx->password = NULL;
+       new_ctx->domainname = NULL;
+       new_ctx->UNC = NULL;
+       new_ctx->iocharset = NULL;
+
+       /*
+        * Make sure to stay in sync with smb3_cleanup_fs_context_contents()
+        */
+       DUP_CTX_STR(prepath);
+       DUP_CTX_STR(mount_options);
+       DUP_CTX_STR(username);
+       DUP_CTX_STR(password);
+       DUP_CTX_STR(UNC);
+       DUP_CTX_STR(domainname);
+       DUP_CTX_STR(nodename);
+       DUP_CTX_STR(iocharset);
+
+       return rc;
+}
+
+static int
+cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
+{
+       substring_t args[MAX_OPT_ARGS];
+
+       switch (match_token(value, cifs_smb_version_tokens, args)) {
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+       case Smb_1:
+               if (disable_legacy_dialects) {
+                       cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+                       return 1;
+               }
+               if (is_smb3) {
+                       cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
+                       return 1;
+               }
+               cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
+               ctx->ops = &smb1_operations;
+               ctx->vals = &smb1_values;
+               break;
+       case Smb_20:
+               if (disable_legacy_dialects) {
+                       cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+                       return 1;
+               }
+               if (is_smb3) {
+                       cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
+                       return 1;
+               }
+               ctx->ops = &smb20_operations;
+               ctx->vals = &smb20_values;
+               break;
+#else
+       case Smb_1:
+               cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
+               return 1;
+       case Smb_20:
+               cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
+               return 1;
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
+       case Smb_21:
+               ctx->ops = &smb21_operations;
+               ctx->vals = &smb21_values;
+               break;
+       case Smb_30:
+               ctx->ops = &smb30_operations;
+               ctx->vals = &smb30_values;
+               break;
+       case Smb_302:
+               ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+               ctx->vals = &smb302_values;
+               break;
+       case Smb_311:
+               ctx->ops = &smb311_operations;
+               ctx->vals = &smb311_values;
+               break;
+       case Smb_3any:
+               ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+               ctx->vals = &smb3any_values;
+               break;
+       case Smb_default:
+               ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+               ctx->vals = &smbdefault_values;
+               break;
+       default:
+               cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath
+ * fields with the result. Returns 0 on success and an error otherwise
+ * (e.g. ENOMEM or EINVAL)
+ */
+int
+smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
+{
+       char *pos;
+       const char *delims = "/\\";
+       size_t len;
+
+       if (unlikely(!devname || !*devname)) {
+               cifs_dbg(VFS, "Device name not specified\n");
+               return -EINVAL;
+       }
+
+       /* make sure we have a valid UNC double delimiter prefix */
+       len = strspn(devname, delims);
+       if (len != 2)
+               return -EINVAL;
+
+       /* find delimiter between host and sharename */
+       pos = strpbrk(devname + 2, delims);
+       if (!pos)
+               return -EINVAL;
+
+       /* skip past delimiter */
+       ++pos;
+
+       /* now go until next delimiter or end of string */
+       len = strcspn(pos, delims);
+
+       /* move "pos" up to delimiter or NULL */
+       pos += len;
+       ctx->UNC = kstrndup(devname, pos - devname, GFP_KERNEL);
+       if (!ctx->UNC)
+               return -ENOMEM;
+
+       convert_delimiter(ctx->UNC, '\\');
+
+       /* skip any delimiter */
+       if (*pos == '/' || *pos == '\\')
+               pos++;
+
+       /* If pos is NULL then no prepath */
+       if (!*pos)
+               return 0;
+
+       ctx->prepath = kstrdup(pos, GFP_KERNEL);
+       if (!ctx->prepath)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void smb3_fs_context_free(struct fs_context *fc);
+static int smb3_fs_context_parse_param(struct fs_context *fc,
+                                      struct fs_parameter *param);
+static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
+                                           void *data);
+static int smb3_get_tree(struct fs_context *fc);
+static int smb3_reconfigure(struct fs_context *fc);
+
+static const struct fs_context_operations smb3_fs_context_ops = {
+       .free                   = smb3_fs_context_free,
+       .parse_param            = smb3_fs_context_parse_param,
+       .parse_monolithic       = smb3_fs_context_parse_monolithic,
+       .get_tree               = smb3_get_tree,
+       .reconfigure            = smb3_reconfigure,
+};
+
+/*
+ * Parse a monolithic block of data from sys_mount().
+ * smb3_fs_context_parse_monolithic - Parse key[=val][,key[=val]]* mount data
+ * @ctx: The superblock configuration to fill in.
+ * @data: The data to parse
+ *
+ * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
+ * called from the ->monolithic_mount_data() fs_context operation.
+ *
+ * Returns 0 on success or the error returned by the ->parse_option() fs_context
+ * operation on failure.
+ */
+static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
+                                          void *data)
+{
+       struct smb3_fs_context *ctx = smb3_fc2context(fc);
+       char *options = data, *key;
+       int ret = 0;
+
+       if (!options)
+               return 0;
+
+       ctx->mount_options = kstrdup(data, GFP_KERNEL);
+       if (ctx->mount_options == NULL)
+               return -ENOMEM;
+
+       ret = security_sb_eat_lsm_opts(options, &fc->security);
+       if (ret)
+               return ret;
+
+       /* BB Need to add support for sep= here TBD */
+       while ((key = strsep(&options, ",")) != NULL) {
+               if (*key) {
+                       size_t v_len = 0;
+                       char *value = strchr(key, '=');
+
+                       if (value) {
+                               if (value == key)
+                                       continue;
+                               *value++ = 0;
+                               v_len = strlen(value);
+                       }
+                       ret = vfs_parse_fs_string(fc, key, value, v_len);
+                       if (ret < 0)
+                               break;
+               }
+       }
+
+       return ret;
+}
+
+/*
+ * Validate the preparsed information in the config.
+ */
+static int smb3_fs_context_validate(struct fs_context *fc)
+{
+       struct smb3_fs_context *ctx = smb3_fc2context(fc);
+
+       if (ctx->rdma && ctx->vals->protocol_id < SMB30_PROT_ID) {
+               cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
+               return -1;
+       }
+
+#ifndef CONFIG_KEYS
+       /* Muliuser mounts require CONFIG_KEYS support */
+       if (ctx->multiuser) {
+               cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
+               return -1;
+       }
+#endif
+
+       if (ctx->got_version == false)
+               pr_warn_once("No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3.1.1), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3.1.1 (or even SMB3 or SMB2.1) specify vers=1.0 on mount.\n");
+
+
+       if (!ctx->UNC) {
+               cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
+               return -1;
+       }
+
+       /* make sure UNC has a share name */
+       if (strlen(ctx->UNC) < 3 || !strchr(ctx->UNC + 3, '\\')) {
+               cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n");
+               return -1;
+       }
+
+       if (!ctx->got_ip) {
+               int len;
+               const char *slash;
+
+               /* No ip= option specified? Try to get it from UNC */
+               /* Use the address part of the UNC. */
+               slash = strchr(&ctx->UNC[2], '\\');
+               len = slash - &ctx->UNC[2];
+               if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr,
+                                         &ctx->UNC[2], len)) {
+                       pr_err("Unable to determine destination address\n");
+                       return -1;
+               }
+       }
+
+       /* set the port that we got earlier */
+       cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port);
+
+       if (ctx->override_uid && !ctx->uid_specified) {
+               ctx->override_uid = 0;
+               pr_notice("ignoring forceuid mount option specified with no uid= option\n");
+       }
+
+       if (ctx->override_gid && !ctx->gid_specified) {
+               ctx->override_gid = 0;
+               pr_notice("ignoring forcegid mount option specified with no gid= option\n");
+       }
+
+       return 0;
+}
+
+static int smb3_get_tree_common(struct fs_context *fc)
+{
+       struct smb3_fs_context *ctx = smb3_fc2context(fc);
+       struct dentry *root;
+       int rc = 0;
+
+       root = cifs_smb3_do_mount(fc->fs_type, 0, ctx);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+
+       fc->root = root;
+
+       return rc;
+}
+
+/*
+ * Create an SMB3 superblock from the parameters passed.
+ */
+static int smb3_get_tree(struct fs_context *fc)
+{
+       int err = smb3_fs_context_validate(fc);
+
+       if (err)
+               return err;
+       return smb3_get_tree_common(fc);
+}
+
+static void smb3_fs_context_free(struct fs_context *fc)
+{
+       struct smb3_fs_context *ctx = smb3_fc2context(fc);
+
+       smb3_cleanup_fs_context(ctx);
+}
+
+/*
+ * Compare the old and new proposed context during reconfigure
+ * and check if the changes are compatible.
+ */
+static int smb3_verify_reconfigure_ctx(struct smb3_fs_context *new_ctx,
+                                      struct smb3_fs_context *old_ctx)
+{
+       if (new_ctx->posix_paths != old_ctx->posix_paths) {
+               cifs_dbg(VFS, "can not change posixpaths during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->sectype != old_ctx->sectype) {
+               cifs_dbg(VFS, "can not change sec during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->multiuser != old_ctx->multiuser) {
+               cifs_dbg(VFS, "can not change multiuser during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->UNC &&
+           (!old_ctx->UNC || strcmp(new_ctx->UNC, old_ctx->UNC))) {
+               cifs_dbg(VFS, "can not change UNC during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->username &&
+           (!old_ctx->username || strcmp(new_ctx->username, old_ctx->username))) {
+               cifs_dbg(VFS, "can not change username during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->password &&
+           (!old_ctx->password || strcmp(new_ctx->password, old_ctx->password))) {
+               cifs_dbg(VFS, "can not change password during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->domainname &&
+           (!old_ctx->domainname || strcmp(new_ctx->domainname, old_ctx->domainname))) {
+               cifs_dbg(VFS, "can not change domainname during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->nodename &&
+           (!old_ctx->nodename || strcmp(new_ctx->nodename, old_ctx->nodename))) {
+               cifs_dbg(VFS, "can not change nodename during remount\n");
+               return -EINVAL;
+       }
+       if (new_ctx->iocharset &&
+           (!old_ctx->iocharset || strcmp(new_ctx->iocharset, old_ctx->iocharset))) {
+               cifs_dbg(VFS, "can not change iocharset during remount\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+#define STEAL_STRING(cifs_sb, ctx, field)                              \
+do {                                                                   \
+       kfree(ctx->field);                                              \
+       ctx->field = cifs_sb->ctx->field;                               \
+       cifs_sb->ctx->field = NULL;                                     \
+} while (0)
+
+static int smb3_reconfigure(struct fs_context *fc)
+{
+       struct smb3_fs_context *ctx = smb3_fc2context(fc);
+       struct dentry *root = fc->root;
+       struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
+       int rc;
+
+       rc = smb3_verify_reconfigure_ctx(ctx, cifs_sb->ctx);
+       if (rc)
+               return rc;
+
+       /*
+        * We can not change UNC/username/password/domainname/nodename/iocharset
+        * during reconnect so ignore what we have in the new context and
+        * just use what we already have in cifs_sb->ctx.
+        */
+       STEAL_STRING(cifs_sb, ctx, UNC);
+       STEAL_STRING(cifs_sb, ctx, username);
+       STEAL_STRING(cifs_sb, ctx, password);
+       STEAL_STRING(cifs_sb, ctx, domainname);
+       STEAL_STRING(cifs_sb, ctx, nodename);
+       STEAL_STRING(cifs_sb, ctx, iocharset);
+
+       /* if rsize or wsize not passed in on remount, use previous values */
+       if (ctx->rsize == 0)
+               ctx->rsize = cifs_sb->ctx->rsize;
+       if (ctx->wsize == 0)
+               ctx->wsize = cifs_sb->ctx->wsize;
+
+
+       smb3_cleanup_fs_context_contents(cifs_sb->ctx);
+       rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
+       smb3_update_mnt_flags(cifs_sb);
+
+       return rc;
+}
+
+static int smb3_fs_context_parse_param(struct fs_context *fc,
+                                     struct fs_parameter *param)
+{
+       struct fs_parse_result result;
+       struct smb3_fs_context *ctx = smb3_fc2context(fc);
+       int i, opt;
+       bool is_smb3 = !strcmp(fc->fs_type->name, "smb3");
+       bool skip_parsing = false;
+
+       cifs_dbg(FYI, "CIFS: parsing cifs mount option '%s'\n", param->key);
+
+       /*
+        * fs_parse can not handle string options with an empty value so
+        * we will need special handling of them.
+        */
+       if (param->type == fs_value_is_string && param->string[0] == 0) {
+               if (!strcmp("pass", param->key) || !strcmp("password", param->key)) {
+                       skip_parsing = true;
+                       opt = Opt_pass;
+               } else if (!strcmp("user", param->key) || !strcmp("username", param->key)) {
+                       skip_parsing = true;
+                       opt = Opt_user;
+               }
+       }
+
+       if (!skip_parsing) {
+               opt = fs_parse(fc, smb3_fs_parameters, param, &result);
+               if (opt < 0)
+                       return ctx->sloppy ? 1 : opt;
+       }
+
+       switch (opt) {
+       case Opt_compress:
+               ctx->compression = UNKNOWN_TYPE;
+               cifs_dbg(VFS,
+                       "SMB3 compression support is experimental\n");
+               break;
+       case Opt_nodfs:
+               ctx->nodfs = 1;
+               break;
+       case Opt_hard:
+               if (result.negated)
+                       ctx->retry = 0;
+               else
+                       ctx->retry = 1;
+               break;
+       case Opt_soft:
+               if (result.negated)
+                       ctx->retry = 1;
+               else
+                       ctx->retry = 0;
+               break;
+       case Opt_mapposix:
+               if (result.negated)
+                       ctx->remap = false;
+               else {
+                       ctx->remap = true;
+                       ctx->sfu_remap = false; /* disable SFU mapping */
+               }
+               break;
+       case Opt_user_xattr:
+               if (result.negated)
+                       ctx->no_xattr = 1;
+               else
+                       ctx->no_xattr = 0;
+               break;
+       case Opt_forceuid:
+               if (result.negated)
+                       ctx->override_uid = 0;
+               else
+                       ctx->override_uid = 1;
+               break;
+       case Opt_forcegid:
+               if (result.negated)
+                       ctx->override_gid = 0;
+               else
+                       ctx->override_gid = 1;
+               break;
+       case Opt_perm:
+               if (result.negated)
+                       ctx->noperm = 1;
+               else
+                       ctx->noperm = 0;
+               break;
+       case Opt_dynperm:
+               if (result.negated)
+                       ctx->dynperm = 0;
+               else
+                       ctx->dynperm = 1;
+               break;
+       case Opt_sfu:
+               if (result.negated)
+                       ctx->sfu_emul = 0;
+               else
+                       ctx->sfu_emul = 1;
+               break;
+       case Opt_noblocksend:
+               ctx->noblocksnd = 1;
+               break;
+       case Opt_noautotune:
+               ctx->noautotune = 1;
+               break;
+       case Opt_nolease:
+               ctx->no_lease = 1;
+               break;
+       case Opt_nodelete:
+               ctx->nodelete = 1;
+               break;
+       case Opt_multichannel:
+               if (result.negated) {
+                       ctx->multichannel = false;
+                       ctx->max_channels = 1;
+               } else {
+                       ctx->multichannel = true;
+                       /* if number of channels not specified, default to 2 */
+                       if (ctx->max_channels < 2)
+                               ctx->max_channels = 2;
+               }
+               break;
+       case Opt_uid:
+               ctx->linux_uid.val = result.uint_32;
+               ctx->uid_specified = true;
+               break;
+       case Opt_cruid:
+               ctx->cred_uid.val = result.uint_32;
+               break;
+       case Opt_backupgid:
+               ctx->backupgid.val = result.uint_32;
+               ctx->backupgid_specified = true;
+               break;
+       case Opt_gid:
+               ctx->linux_gid.val = result.uint_32;
+               ctx->gid_specified = true;
+               break;
+       case Opt_port:
+               ctx->port = result.uint_32;
+               break;
+       case Opt_file_mode:
+               ctx->file_mode = result.uint_32;
+               break;
+       case Opt_dirmode:
+               ctx->dir_mode = result.uint_32;
+               break;
+       case Opt_min_enc_offload:
+               ctx->min_offload = result.uint_32;
+               break;
+       case Opt_blocksize:
+               /*
+                * inode blocksize realistically should never need to be
+                * less than 16K or greater than 16M and default is 1MB.
+                * Note that small inode block sizes (e.g. 64K) can lead
+                * to very poor performance of common tools like cp and scp
+                */
+               if ((result.uint_32 < CIFS_MAX_MSGSIZE) ||
+                  (result.uint_32 > (4 * SMB3_DEFAULT_IOSIZE))) {
+                       cifs_dbg(VFS, "%s: Invalid blocksize\n",
+                               __func__);
+                       goto cifs_parse_mount_err;
+               }
+               ctx->bsize = result.uint_32;
+               ctx->got_bsize = true;
+               break;
+       case Opt_rsize:
+               ctx->rsize = result.uint_32;
+               ctx->got_rsize = true;
+               break;
+       case Opt_wsize:
+               ctx->wsize = result.uint_32;
+               ctx->got_wsize = true;
+               break;
+       case Opt_actimeo:
+               ctx->actimeo = HZ * result.uint_32;
+               if (ctx->actimeo > CIFS_MAX_ACTIMEO) {
+                       cifs_dbg(VFS, "attribute cache timeout too large\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_echo_interval:
+               ctx->echo_interval = result.uint_32;
+               break;
+       case Opt_snapshot:
+               ctx->snapshot_time = result.uint_32;
+               break;
+       case Opt_max_credits:
+               if (result.uint_32 < 20 || result.uint_32 > 60000) {
+                       cifs_dbg(VFS, "%s: Invalid max_credits value\n",
+                                __func__);
+                       goto cifs_parse_mount_err;
+               }
+               ctx->max_credits = result.uint_32;
+               break;
+       case Opt_max_channels:
+               if (result.uint_32 < 1 || result.uint_32 > CIFS_MAX_CHANNELS) {
+                       cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n",
+                                __func__, CIFS_MAX_CHANNELS);
+                       goto cifs_parse_mount_err;
+               }
+               ctx->max_channels = result.uint_32;
+               break;
+       case Opt_handletimeout:
+               ctx->handle_timeout = result.uint_32;
+               if (ctx->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
+                       cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_source:
+               kfree(ctx->UNC);
+               ctx->UNC = NULL;
+               switch (smb3_parse_devname(param->string, ctx)) {
+               case 0:
+                       break;
+               case -ENOMEM:
+                       cifs_dbg(VFS, "Unable to allocate memory for devname\n");
+                       goto cifs_parse_mount_err;
+               case -EINVAL:
+                       cifs_dbg(VFS, "Malformed UNC in devname\n");
+                       goto cifs_parse_mount_err;
+               default:
+                       cifs_dbg(VFS, "Unknown error parsing devname\n");
+                       goto cifs_parse_mount_err;
+               }
+               fc->source = kstrdup(param->string, GFP_KERNEL);
+               if (fc->source == NULL) {
+                       cifs_dbg(VFS, "OOM when copying UNC string\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_user:
+               kfree(ctx->username);
+               ctx->username = NULL;
+               if (strlen(param->string) == 0) {
+                       /* null user, ie. anonymous authentication */
+                       ctx->nullauth = 1;
+                       break;
+               }
+
+               if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) >
+                   CIFS_MAX_USERNAME_LEN) {
+                       pr_warn("username too long\n");
+                       goto cifs_parse_mount_err;
+               }
+               ctx->username = kstrdup(param->string, GFP_KERNEL);
+               if (ctx->username == NULL) {
+                       cifs_dbg(VFS, "OOM when copying username string\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_pass:
+               kfree(ctx->password);
+               ctx->password = NULL;
+               if (strlen(param->string) == 0)
+                       break;
+
+               ctx->password = kstrdup(param->string, GFP_KERNEL);
+               if (ctx->password == NULL) {
+                       cifs_dbg(VFS, "OOM when copying password string\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_ip:
+               if (strlen(param->string) == 0) {
+                       ctx->got_ip = false;
+                       break;
+               }
+               if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr,
+                                         param->string,
+                                         strlen(param->string))) {
+                       pr_err("bad ip= option (%s)\n", param->string);
+                       goto cifs_parse_mount_err;
+               }
+               ctx->got_ip = true;
+               break;
+       case Opt_domain:
+               if (strnlen(param->string, CIFS_MAX_DOMAINNAME_LEN)
+                               == CIFS_MAX_DOMAINNAME_LEN) {
+                       pr_warn("domain name too long\n");
+                       goto cifs_parse_mount_err;
+               }
+
+               kfree(ctx->domainname);
+               ctx->domainname = kstrdup(param->string, GFP_KERNEL);
+               if (ctx->domainname == NULL) {
+                       cifs_dbg(VFS, "OOM when copying domainname string\n");
+                       goto cifs_parse_mount_err;
+               }
+               cifs_dbg(FYI, "Domain name set\n");
+               break;
+       case Opt_srcaddr:
+               if (!cifs_convert_address(
+                               (struct sockaddr *)&ctx->srcaddr,
+                               param->string, strlen(param->string))) {
+                       pr_warn("Could not parse srcaddr: %s\n",
+                               param->string);
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_iocharset:
+               if (strnlen(param->string, 1024) >= 65) {
+                       pr_warn("iocharset name too long\n");
+                       goto cifs_parse_mount_err;
+               }
+
+               if (strncasecmp(param->string, "default", 7) != 0) {
+                       kfree(ctx->iocharset);
+                       ctx->iocharset = kstrdup(param->string, GFP_KERNEL);
+                       if (ctx->iocharset == NULL) {
+                               cifs_dbg(VFS, "OOM when copying iocharset string\n");
+                               goto cifs_parse_mount_err;
+                       }
+               }
+               /* if iocharset not set then load_nls_default
+                * is used by caller
+                */
+                cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
+               break;
+       case Opt_netbiosname:
+               memset(ctx->source_rfc1001_name, 0x20,
+                       RFC1001_NAME_LEN);
+               /*
+                * FIXME: are there cases in which a comma can
+                * be valid in workstation netbios name (and
+                * need special handling)?
+                */
+               for (i = 0; i < RFC1001_NAME_LEN; i++) {
+                       /* don't ucase netbiosname for user */
+                       if (param->string[i] == 0)
+                               break;
+                       ctx->source_rfc1001_name[i] = param->string[i];
+               }
+               /* The string has 16th byte zero still from
+                * set at top of the function
+                */
+               if (i == RFC1001_NAME_LEN && param->string[i] != 0)
+                       pr_warn("netbiosname longer than 15 truncated\n");
+               break;
+       case Opt_servern:
+               /* last byte, type, is 0x20 for servr type */
+               memset(ctx->target_rfc1001_name, 0x20,
+                       RFC1001_NAME_LEN_WITH_NULL);
+               /*
+                * BB are there cases in which a comma can be valid in this
+                * workstation netbios name (and need special handling)?
+                */
+
+               /* user or mount helper must uppercase the netbios name */
+               for (i = 0; i < 15; i++) {
+                       if (param->string[i] == 0)
+                               break;
+                       ctx->target_rfc1001_name[i] = param->string[i];
+               }
+
+               /* The string has 16th byte zero still from set at top of function */
+               if (i == RFC1001_NAME_LEN && param->string[i] != 0)
+                       pr_warn("server netbiosname longer than 15 truncated\n");
+               break;
+       case Opt_ver:
+               /* version of mount userspace tools, not dialect */
+               /* If interface changes in mount.cifs bump to new ver */
+               if (strncasecmp(param->string, "1", 1) == 0) {
+                       if (strlen(param->string) > 1) {
+                               pr_warn("Bad mount helper ver=%s. Did you want SMB1 (CIFS) dialect and mean to type vers=1.0 instead?\n",
+                                       param->string);
+                               goto cifs_parse_mount_err;
+                       }
+                       /* This is the default */
+                       break;
+               }
+               /* For all other value, error */
+               pr_warn("Invalid mount helper version specified\n");
+               goto cifs_parse_mount_err;
+       case Opt_vers:
+               /* protocol version (dialect) */
+               if (cifs_parse_smb_version(param->string, ctx, is_smb3) != 0)
+                       goto cifs_parse_mount_err;
+               ctx->got_version = true;
+               break;
+       case Opt_sec:
+               if (cifs_parse_security_flavors(param->string, ctx) != 0)
+                       goto cifs_parse_mount_err;
+               break;
+       case Opt_cache:
+               if (cifs_parse_cache_flavor(param->string, ctx) != 0)
+                       goto cifs_parse_mount_err;
+               break;
+       case Opt_witness:
+#ifndef CONFIG_CIFS_SWN_UPCALL
+               cifs_dbg(VFS, "Witness support needs CONFIG_CIFS_SWN_UPCALL config option\n");
+                       goto cifs_parse_mount_err;
+#endif
+               ctx->witness = true;
+               pr_warn_once("Witness protocol support is experimental\n");
+               break;
+       case Opt_rootfs:
+#ifdef CONFIG_CIFS_ROOT
+               ctx->rootfs = true;
+#endif
+               break;
+       case Opt_posixpaths:
+               if (result.negated)
+                       ctx->posix_paths = 0;
+               else
+                       ctx->posix_paths = 1;
+               break;
+       case Opt_unix:
+               if (result.negated)
+                       ctx->linux_ext = 0;
+               else
+                       ctx->no_linux_ext = 1;
+               break;
+       case Opt_nocase:
+               ctx->nocase = 1;
+               break;
+       case Opt_brl:
+               if (result.negated) {
+                       /*
+                        * turn off mandatory locking in mode
+                        * if remote locking is turned off since the
+                        * local vfs will do advisory
+                        */
+                       if (ctx->file_mode ==
+                               (S_IALLUGO & ~(S_ISUID | S_IXGRP)))
+                               ctx->file_mode = S_IALLUGO;
+                       ctx->nobrl =  1;
+               } else
+                       ctx->nobrl =  0;
+               break;
+       case Opt_handlecache:
+               if (result.negated)
+                       ctx->nohandlecache = 1;
+               else
+                       ctx->nohandlecache = 0;
+               break;
+       case Opt_forcemandatorylock:
+               ctx->mand_lock = 1;
+               break;
+       case Opt_setuids:
+               ctx->setuids = result.negated;
+               break;
+       case Opt_intr:
+               ctx->intr = !result.negated;
+               break;
+       case Opt_setuidfromacl:
+               ctx->setuidfromacl = 1;
+               break;
+       case Opt_strictsync:
+               ctx->nostrictsync = result.negated;
+               break;
+       case Opt_serverino:
+               ctx->server_ino = !result.negated;
+               break;
+       case Opt_rwpidforward:
+               ctx->rwpidforward = 1;
+               break;
+       case Opt_modesid:
+               ctx->mode_ace = 1;
+               break;
+       case Opt_cifsacl:
+               ctx->cifs_acl = !result.negated;
+               break;
+       case Opt_acl:
+               ctx->no_psx_acl = result.negated;
+               break;
+       case Opt_locallease:
+               ctx->local_lease = 1;
+               break;
+       case Opt_sign:
+               ctx->sign = true;
+               break;
+       case Opt_ignore_signature:
+               ctx->sign = true;
+               ctx->ignore_signature = true;
+               break;
+       case Opt_seal:
+               /* we do not do the following in secFlags because seal
+                * is a per tree connection (mount) not a per socket
+                * or per-smb connection option in the protocol
+                * vol->secFlg |= CIFSSEC_MUST_SEAL;
+                */
+               ctx->seal = 1;
+               break;
+       case Opt_noac:
+               pr_warn("Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
+               break;
+       case Opt_fsc:
+#ifndef CONFIG_CIFS_FSCACHE
+               cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
+               goto cifs_parse_mount_err;
+#endif
+               ctx->fsc = true;
+               break;
+       case Opt_mfsymlinks:
+               ctx->mfsymlinks = true;
+               break;
+       case Opt_multiuser:
+               ctx->multiuser = true;
+               break;
+       case Opt_sloppy:
+               ctx->sloppy = true;
+               break;
+       case Opt_nosharesock:
+               ctx->nosharesock = true;
+               break;
+       case Opt_persistent:
+               if (result.negated) {
+                       ctx->nopersistent = true;
+                       if (ctx->persistent) {
+                               cifs_dbg(VFS,
+                                 "persistenthandles mount options conflict\n");
+                               goto cifs_parse_mount_err;
+                       }
+               } else {
+                       ctx->persistent = true;
+                       if ((ctx->nopersistent) || (ctx->resilient)) {
+                               cifs_dbg(VFS,
+                                 "persistenthandles mount options conflict\n");
+                               goto cifs_parse_mount_err;
+                       }
+               }
+               break;
+       case Opt_resilient:
+               if (result.negated) {
+                       ctx->resilient = false; /* already the default */
+               } else {
+                       ctx->resilient = true;
+                       if (ctx->persistent) {
+                               cifs_dbg(VFS,
+                                 "persistenthandles mount options conflict\n");
+                               goto cifs_parse_mount_err;
+                       }
+               }
+               break;
+       case Opt_domainauto:
+               ctx->domainauto = true;
+               break;
+       case Opt_rdma:
+               ctx->rdma = true;
+               break;
+       }
+       /* case Opt_ignore: - is ignored as expected ... */
+
+       return 0;
+
+ cifs_parse_mount_err:
+       return 1;
+}
+
+int smb3_init_fs_context(struct fs_context *fc)
+{
+       struct smb3_fs_context *ctx;
+       char *nodename = utsname()->nodename;
+       int i;
+
+       ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
+       if (unlikely(!ctx))
+               return -ENOMEM;
+
+       /*
+        * does not have to be perfect mapping since field is
+        * informational, only used for servers that do not support
+        * port 445 and it can be overridden at mount time
+        */
+       memset(ctx->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
+       for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
+               ctx->source_rfc1001_name[i] = toupper(nodename[i]);
+
+       ctx->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
+       /*
+        * null target name indicates to use *SMBSERVR default called name
+        *  if we end up sending RFC1001 session initialize
+        */
+       ctx->target_rfc1001_name[0] = 0;
+       ctx->cred_uid = current_uid();
+       ctx->linux_uid = current_uid();
+       ctx->linux_gid = current_gid();
+       ctx->bsize = 1024 * 1024; /* can improve cp performance significantly */
+
+       /*
+        * default to SFM style remapping of seven reserved characters
+        * unless user overrides it or we negotiate CIFS POSIX where
+        * it is unnecessary.  Can not simultaneously use more than one mapping
+        * since then readdir could list files that open could not open
+        */
+       ctx->remap = true;
+
+       /* default to only allowing write access to owner of the mount */
+       ctx->dir_mode = ctx->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
+
+       /* ctx->retry default is 0 (i.e. "soft" limited retry not hard retry) */
+       /* default is always to request posix paths. */
+       ctx->posix_paths = 1;
+       /* default to using server inode numbers where available */
+       ctx->server_ino = 1;
+
+       /* default is to use strict cifs caching semantics */
+       ctx->strict_io = true;
+
+       ctx->actimeo = CIFS_DEF_ACTIMEO;
+
+       /* Most clients set timeout to 0, allows server to use its default */
+       ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
+
+       /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
+       ctx->ops = &smb30_operations;
+       ctx->vals = &smbdefault_values;
+
+       ctx->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
+
+       /* default to no multichannel (single server connection) */
+       ctx->multichannel = false;
+       ctx->max_channels = 1;
+
+       ctx->backupuid_specified = false; /* no backup intent for a user */
+       ctx->backupgid_specified = false; /* no backup intent for a group */
+
+/*
+ *     short int override_uid = -1;
+ *     short int override_gid = -1;
+ *     char *nodename = strdup(utsname()->nodename);
+ *     struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr;
+ */
+
+       fc->fs_private = ctx;
+       fc->ops = &smb3_fs_context_ops;
+       return 0;
+}
+
+void
+smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
+{
+       if (ctx == NULL)
+               return;
+
+       /*
+        * Make sure this stays in sync with smb3_fs_context_dup()
+        */
+       kfree(ctx->mount_options);
+       ctx->mount_options = NULL;
+       kfree(ctx->username);
+       ctx->username = NULL;
+       kfree_sensitive(ctx->password);
+       ctx->password = NULL;
+       kfree(ctx->UNC);
+       ctx->UNC = NULL;
+       kfree(ctx->domainname);
+       ctx->domainname = NULL;
+       kfree(ctx->nodename);
+       ctx->nodename = NULL;
+       kfree(ctx->iocharset);
+       ctx->iocharset = NULL;
+       kfree(ctx->prepath);
+       ctx->prepath = NULL;
+}
+
+void
+smb3_cleanup_fs_context(struct smb3_fs_context *ctx)
+{
+       if (!ctx)
+               return;
+       smb3_cleanup_fs_context_contents(ctx);
+       kfree(ctx);
+}
+
+void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb)
+{
+       struct smb3_fs_context *ctx = cifs_sb->ctx;
+
+       if (ctx->nodfs)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_DFS;
+
+       if (ctx->noperm)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_PERM;
+
+       if (ctx->setuids)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SET_UID;
+
+       if (ctx->setuidfromacl)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UID_FROM_ACL;
+
+       if (ctx->server_ino)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
+
+       if (ctx->remap)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SFM_CHR;
+
+       if (ctx->sfu_remap)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR;
+
+       if (ctx->no_xattr)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_XATTR;
+
+       if (ctx->sfu_emul)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UNX_EMUL;
+
+       if (ctx->nobrl)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_BRL;
+
+       if (ctx->nohandlecache)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_HANDLE_CACHE;
+
+       if (ctx->nostrictsync)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOSSYNC;
+
+       if (ctx->mand_lock)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOPOSIXBRL;
+
+       if (ctx->rwpidforward)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_RWPIDFORWARD;
+
+       if (ctx->mode_ace)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MODE_FROM_SID;
+
+       if (ctx->cifs_acl)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_ACL;
+
+       if (ctx->backupuid_specified)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPUID;
+
+       if (ctx->backupgid_specified)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPGID;
+
+       if (ctx->override_uid)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_UID;
+
+       if (ctx->override_gid)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_GID;
+
+       if (ctx->dynperm)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DYNPERM;
+
+       if (ctx->fsc)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_FSCACHE;
+
+       if (ctx->multiuser)
+               cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
+                                           CIFS_MOUNT_NO_PERM);
+       else
+               cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MULTIUSER |
+                                            CIFS_MOUNT_NO_PERM);
+
+       if (ctx->strict_io)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_STRICT_IO;
+
+       if (ctx->direct_io)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DIRECT_IO;
+
+       if (ctx->mfsymlinks)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS;
+       else
+               cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MF_SYMLINKS;
+       if (ctx->mfsymlinks) {
+               if (ctx->sfu_emul) {
+                       /*
+                        * Our SFU ("Services for Unix" emulation does not allow
+                        * creating symlinks but does allow reading existing SFU
+                        * symlinks (it does allow both creating and reading SFU
+                        * style mknod and FIFOs though). When "mfsymlinks" and
+                        * "sfu" are both enabled at the same time, it allows
+                        * reading both types of symlinks, but will only create
+                        * them with mfsymlinks format. This allows better
+                        * Apple compatibility (probably better for Samba too)
+                        * while still recognizing old Windows style symlinks.
+                        */
+                       cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
+               }
+       }
+
+       return;
+}
index 886208a..3358b33 100644 (file)
@@ -9,8 +9,11 @@
 #ifndef _FS_CONTEXT_H
 #define _FS_CONTEXT_H
 
-#include <linux/parser.h>
 #include "cifsglob.h"
+#include <linux/parser.h>
+#include <linux/fs_parser.h>
+
+#define cifs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
 
 enum smb_version {
        Smb_1 = 1,
@@ -24,8 +27,6 @@ enum smb_version {
        Smb_version_err
 };
 
-int cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3);
-
 enum {
        Opt_cache_loose,
        Opt_cache_strict,
@@ -35,8 +36,6 @@ enum {
        Opt_cache_err
 };
 
-int cifs_parse_cache_flavor(char *value, struct smb_vol *vol);
-
 enum cifs_sec_param {
        Opt_sec_krb5,
        Opt_sec_krb5i,
@@ -53,6 +52,220 @@ enum cifs_sec_param {
        Opt_sec_err
 };
 
-int cifs_parse_security_flavors(char *value, struct smb_vol *vol);
+enum cifs_param {
+       /* Mount options that take no arguments */
+       Opt_user_xattr,
+       Opt_forceuid,
+       Opt_forcegid,
+       Opt_noblocksend,
+       Opt_noautotune,
+       Opt_nolease,
+       Opt_hard,
+       Opt_soft,
+       Opt_perm,
+       Opt_nodelete,
+       Opt_mapposix,
+       Opt_mapchars,
+       Opt_nomapchars,
+       Opt_sfu,
+       Opt_nodfs,
+       Opt_posixpaths,
+       Opt_unix,
+       Opt_nocase,
+       Opt_brl,
+       Opt_handlecache,
+       Opt_forcemandatorylock,
+       Opt_setuidfromacl,
+       Opt_setuids,
+       Opt_dynperm,
+       Opt_intr,
+       Opt_strictsync,
+       Opt_serverino,
+       Opt_rwpidforward,
+       Opt_cifsacl,
+       Opt_acl,
+       Opt_locallease,
+       Opt_sign,
+       Opt_ignore_signature,
+       Opt_seal,
+       Opt_noac,
+       Opt_fsc,
+       Opt_mfsymlinks,
+       Opt_multiuser,
+       Opt_sloppy,
+       Opt_nosharesock,
+       Opt_persistent,
+       Opt_resilient,
+       Opt_domainauto,
+       Opt_rdma,
+       Opt_modesid,
+       Opt_rootfs,
+       Opt_multichannel,
+       Opt_compress,
+       Opt_witness,
+
+       /* Mount options which take numeric value */
+       Opt_backupuid,
+       Opt_backupgid,
+       Opt_uid,
+       Opt_cruid,
+       Opt_gid,
+       Opt_port,
+       Opt_file_mode,
+       Opt_dirmode,
+       Opt_min_enc_offload,
+       Opt_blocksize,
+       Opt_rsize,
+       Opt_wsize,
+       Opt_actimeo,
+       Opt_echo_interval,
+       Opt_max_credits,
+       Opt_snapshot,
+       Opt_max_channels,
+       Opt_handletimeout,
+
+       /* Mount options which take string value */
+       Opt_source,
+       Opt_user,
+       Opt_pass,
+       Opt_ip,
+       Opt_domain,
+       Opt_srcaddr,
+       Opt_iocharset,
+       Opt_netbiosname,
+       Opt_servern,
+       Opt_ver,
+       Opt_vers,
+       Opt_sec,
+       Opt_cache,
+
+       /* Mount options to be ignored */
+       Opt_ignore,
+
+       Opt_err
+};
+
+struct smb3_fs_context {
+       bool uid_specified;
+       bool gid_specified;
+       bool sloppy;
+       bool got_ip;
+       bool got_version;
+       bool got_rsize;
+       bool got_wsize;
+       bool got_bsize;
+       unsigned short port;
+
+       char *username;
+       char *password;
+       char *domainname;
+       char *UNC;
+       char *nodename;
+       char *iocharset;  /* local code page for mapping to and from Unicode */
+       char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
+       char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
+       kuid_t cred_uid;
+       kuid_t linux_uid;
+       kgid_t linux_gid;
+       kuid_t backupuid;
+       kgid_t backupgid;
+       umode_t file_mode;
+       umode_t dir_mode;
+       enum securityEnum sectype; /* sectype requested via mnt opts */
+       bool sign; /* was signing requested via mnt opts? */
+       bool ignore_signature:1;
+       bool retry:1;
+       bool intr:1;
+       bool setuids:1;
+       bool setuidfromacl:1;
+       bool override_uid:1;
+       bool override_gid:1;
+       bool dynperm:1;
+       bool noperm:1;
+       bool nodelete:1;
+       bool mode_ace:1;
+       bool no_psx_acl:1; /* set if posix acl support should be disabled */
+       bool cifs_acl:1;
+       bool backupuid_specified; /* mount option  backupuid  is specified */
+       bool backupgid_specified; /* mount option  backupgid  is specified */
+       bool no_xattr:1;   /* set if xattr (EA) support should be disabled*/
+       bool server_ino:1; /* use inode numbers from server ie UniqueId */
+       bool direct_io:1;
+       bool strict_io:1; /* strict cache behavior */
+       bool cache_ro:1;
+       bool cache_rw:1;
+       bool remap:1;      /* set to remap seven reserved chars in filenames */
+       bool sfu_remap:1;  /* remap seven reserved chars ala SFU */
+       bool posix_paths:1; /* unset to not ask for posix pathnames. */
+       bool no_linux_ext:1;
+       bool linux_ext:1;
+       bool sfu_emul:1;
+       bool nullauth:1;   /* attempt to authenticate with null user */
+       bool nocase:1;     /* request case insensitive filenames */
+       bool nobrl:1;      /* disable sending byte range locks to srv */
+       bool nohandlecache:1; /* disable caching dir handles if srvr probs */
+       bool mand_lock:1;  /* send mandatory not posix byte range lock reqs */
+       bool seal:1;       /* request transport encryption on share */
+       bool nodfs:1;      /* Do not request DFS, even if available */
+       bool local_lease:1; /* check leases only on local system, not remote */
+       bool noblocksnd:1;
+       bool noautotune:1;
+       bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
+       bool no_lease:1;     /* disable requesting leases */
+       bool fsc:1;     /* enable fscache */
+       bool mfsymlinks:1; /* use Minshall+French Symlinks */
+       bool multiuser:1;
+       bool rwpidforward:1; /* pid forward for read/write operations */
+       bool nosharesock:1;
+       bool persistent:1;
+       bool nopersistent:1;
+       bool resilient:1; /* noresilient not required since not fored for CA */
+       bool domainauto:1;
+       bool rdma:1;
+       bool multichannel:1;
+       bool use_client_guid:1;
+       /* reuse existing guid for multichannel */
+       u8 client_guid[SMB2_CLIENT_GUID_SIZE];
+       unsigned int bsize;
+       unsigned int rsize;
+       unsigned int wsize;
+       unsigned int min_offload;
+       bool sockopt_tcp_nodelay:1;
+       unsigned long actimeo; /* attribute cache timeout (jiffies) */
+       struct smb_version_operations *ops;
+       struct smb_version_values *vals;
+       char *prepath;
+       struct sockaddr_storage dstaddr; /* destination address */
+       struct sockaddr_storage srcaddr; /* allow binding to a local IP */
+       struct nls_table *local_nls; /* This is a copy of the pointer in cifs_sb */
+       unsigned int echo_interval; /* echo interval in secs */
+       __u64 snapshot_time; /* needed for timewarp tokens */
+       __u32 handle_timeout; /* persistent and durable handle timeout in ms */
+       unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
+       unsigned int max_channels;
+       __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
+       bool rootfs:1; /* if it's a SMB root file system */
+       bool witness:1; /* use witness protocol */
+
+       char *mount_options;
+};
+
+extern const struct fs_parameter_spec smb3_fs_parameters[];
+
+extern int cifs_parse_cache_flavor(char *value,
+                                  struct smb3_fs_context *ctx);
+extern int cifs_parse_security_flavors(char *value,
+                                      struct smb3_fs_context *ctx);
+extern int smb3_init_fs_context(struct fs_context *fc);
+extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx);
+extern void smb3_cleanup_fs_context(struct smb3_fs_context *ctx);
+
+static inline struct smb3_fs_context *smb3_fc2context(const struct fs_context *fc)
+{
+       return fc->fs_private;
+}
+
+extern int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx);
+extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
 
 #endif
index da68818..20d24af 100644 (file)
@@ -22,6 +22,7 @@
 #include "cifsglob.h"
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
+#include "cifsproto.h"
 
 /*
  * Key layout of CIFS server cache index object
index 1091633..e811f2d 100644 (file)
@@ -57,7 +57,6 @@ extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
 
 extern int cifs_fscache_register(void);
 extern void cifs_fscache_unregister(void);
-extern char *extract_sharename(const char *);
 
 /*
  * fscache.c
index 9ee5f30..a83b3a8 100644 (file)
@@ -37,6 +37,7 @@
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
 #include "fscache.h"
+#include "fs_context.h"
 
 
 static void cifs_set_ops(struct inode *inode)
@@ -294,7 +295,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
                break;
        }
 
-       fattr->cf_uid = cifs_sb->mnt_uid;
+       fattr->cf_uid = cifs_sb->ctx->linux_uid;
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) {
                u64 id = le64_to_cpu(info->Uid);
                if (id < ((uid_t)-1)) {
@@ -304,7 +305,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
                }
        }
        
-       fattr->cf_gid = cifs_sb->mnt_gid;
+       fattr->cf_gid = cifs_sb->ctx->linux_gid;
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) {
                u64 id = le64_to_cpu(info->Gid);
                if (id < ((gid_t)-1)) {
@@ -333,8 +334,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 
        memset(fattr, 0, sizeof(*fattr));
        fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
-       fattr->cf_uid = cifs_sb->mnt_uid;
-       fattr->cf_gid = cifs_sb->mnt_gid;
+       fattr->cf_uid = cifs_sb->ctx->linux_uid;
+       fattr->cf_gid = cifs_sb->ctx->linux_gid;
        ktime_get_coarse_real_ts64(&fattr->cf_mtime);
        fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime;
        fattr->cf_nlink = 2;
@@ -644,8 +645,8 @@ smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo *
        }
        /* else if reparse point ... TODO: add support for FIFO and blk dev; special file types */
 
-       fattr->cf_uid = cifs_sb->mnt_uid; /* TODO: map uid and gid from SID */
-       fattr->cf_gid = cifs_sb->mnt_gid;
+       fattr->cf_uid = cifs_sb->ctx->linux_uid; /* TODO: map uid and gid from SID */
+       fattr->cf_gid = cifs_sb->ctx->linux_gid;
 
        cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n",
                fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
@@ -685,25 +686,25 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 
        fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
        if (reparse_tag == IO_REPARSE_TAG_LX_SYMLINK) {
-               fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_LNK;
        } else if (reparse_tag == IO_REPARSE_TAG_LX_FIFO) {
-               fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_FIFO;
        } else if (reparse_tag == IO_REPARSE_TAG_AF_UNIX) {
-               fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_SOCK;
        } else if (reparse_tag == IO_REPARSE_TAG_LX_CHR) {
-               fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_CHR;
        } else if (reparse_tag == IO_REPARSE_TAG_LX_BLK) {
-               fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_BLK;
        } else if (symlink) { /* TODO add more reparse tag checks */
                fattr->cf_mode = S_IFLNK;
                fattr->cf_dtype = DT_LNK;
        } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
-               fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+               fattr->cf_mode = S_IFDIR | cifs_sb->ctx->dir_mode;
                fattr->cf_dtype = DT_DIR;
                /*
                 * Server can return wrong NumberOfLinks value for directories
@@ -712,7 +713,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
                if (!tcon->unix_ext)
                        fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
        } else {
-               fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+               fattr->cf_mode = S_IFREG | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_REG;
 
                /* clear write bits if ATTR_READONLY is set */
@@ -731,8 +732,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
                }
        }
 
-       fattr->cf_uid = cifs_sb->mnt_uid;
-       fattr->cf_gid = cifs_sb->mnt_gid;
+       fattr->cf_uid = cifs_sb->ctx->linux_uid;
+       fattr->cf_gid = cifs_sb->ctx->linux_gid;
 }
 
 static int
@@ -771,6 +772,7 @@ cifs_get_file_info(struct file *filp)
                 */
                rc = 0;
                CIFS_I(inode)->time = 0;
+               goto cgfi_exit;
        default:
                goto cgfi_exit;
        }
@@ -803,11 +805,15 @@ static __u64 simple_hashstr(const char *str)
  * cifs_backup_query_path_info - SMB1 fallback code to get ino
  *
  * Fallback code to get file metadata when we don't have access to
- * @full_path (EACCES) and have backup creds.
+ * full_path (EACCES) and have backup creds.
  *
- * @data will be set to search info result buffer
- * @resp_buf will be set to cifs resp buf and needs to be freed with
- * cifs_buf_release() when done with @data.
+ * @xid:       transaction id used to identify original request in logs
+ * @tcon:      information about the server share we have mounted
+ * @sb:        the superblock stores info such as disk space available
+ * @full_path: name of the file we are getting the metadata for
+ * @resp_buf:  will be set to cifs resp buf and needs to be freed with
+ *             cifs_buf_release() when done with @data
+ * @data:      will be set to search info result buffer
  */
 static int
 cifs_backup_query_path_info(int xid,
@@ -1386,8 +1392,8 @@ iget_no_retry:
                set_nlink(inode, 2);
                inode->i_op = &cifs_ipc_inode_ops;
                inode->i_fop = &simple_dir_operations;
-               inode->i_uid = cifs_sb->mnt_uid;
-               inode->i_gid = cifs_sb->mnt_gid;
+               inode->i_uid = cifs_sb->ctx->linux_uid;
+               inode->i_gid = cifs_sb->ctx->linux_gid;
                spin_unlock(&inode->i_lock);
        } else if (rc) {
                iget_failed(inode);
@@ -2192,11 +2198,11 @@ cifs_inode_needs_reval(struct inode *inode)
        if (!lookupCacheEnabled)
                return true;
 
-       if (!cifs_sb->actimeo)
+       if (!cifs_sb->ctx->actimeo)
                return true;
 
        if (!time_in_range(jiffies, cifs_i->time,
-                               cifs_i->time + cifs_sb->actimeo))
+                               cifs_i->time + cifs_sb->ctx->actimeo))
                return true;
 
        /* hardlinked files w/ noserverino get "special" treatment */
@@ -2228,7 +2234,9 @@ cifs_invalidate_mapping(struct inode *inode)
 
 /**
  * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks
- * @word: long word containing the bit lock
+ *
+ * @key:       currently unused
+ * @mode:      the task state to sleep in
  */
 static int
 cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
@@ -2401,7 +2409,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
        }
 
        generic_fillattr(inode, stat);
-       stat->blksize = cifs_sb->bsize;
+       stat->blksize = cifs_sb->ctx->bsize;
        stat->ino = CIFS_I(inode)->uniqueid;
 
        /* old CIFS Unix Extensions doesn't return create time */
@@ -2812,7 +2820,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
            (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) {
                if (uid_valid(uid) || gid_valid(gid)) {
-                       rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64,
+                       mode = NO_CHANGE_64;
+                       rc = id_mode_to_cifs_acl(inode, full_path, &mode,
                                                        uid, gid);
                        if (rc) {
                                cifs_dbg(FYI, "%s: Setting id failed with error: %d\n",
@@ -2833,13 +2842,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
                rc = 0;
                if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
                    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) {
-                       rc = id_mode_to_cifs_acl(inode, full_path, mode,
+                       rc = id_mode_to_cifs_acl(inode, full_path, &mode,
                                                INVALID_UID, INVALID_GID);
                        if (rc) {
                                cifs_dbg(FYI, "%s: Setting ACL failed with error: %d\n",
                                         __func__, rc);
                                goto cifs_setattr_exit;
                        }
+
+                       /*
+                        * In case of CIFS_MOUNT_CIFS_ACL, we cannot support all modes.
+                        * Pick up the actual mode bits that were set.
+                        */
+                       if (mode != attrs->ia_mode)
+                               attrs->ia_mode = mode;
                } else
                if (((mode & S_IWUGO) == 0) &&
                    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
@@ -2862,10 +2878,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
                                attrs->ia_mode &= ~(S_IALLUGO);
                                if (S_ISDIR(inode->i_mode))
                                        attrs->ia_mode |=
-                                               cifs_sb->mnt_dir_mode;
+                                               cifs_sb->ctx->dir_mode;
                                else
                                        attrs->ia_mode |=
-                                               cifs_sb->mnt_file_mode;
+                                               cifs_sb->ctx->file_mode;
                        }
                } else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
                        /* ignore mode change - ATTR_READONLY hasn't changed */
index 1c14cf0..82e1767 100644 (file)
@@ -35,6 +35,7 @@
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dns_resolve.h"
 #endif
+#include "fs_context.h"
 
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
@@ -632,11 +633,11 @@ bool
 backup_cred(struct cifs_sb_info *cifs_sb)
 {
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) {
-               if (uid_eq(cifs_sb->mnt_backupuid, current_fsuid()))
+               if (uid_eq(cifs_sb->ctx->backupuid, current_fsuid()))
                        return true;
        }
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) {
-               if (in_group_p(cifs_sb->mnt_backupgid))
+               if (in_group_p(cifs_sb->ctx->backupgid))
                        return true;
        }
 
diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
new file mode 100644 (file)
index 0000000..5aaabe4
--- /dev/null
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#include <net/genetlink.h>
+#include <uapi/linux/cifs/cifs_netlink.h>
+
+#include "netlink.h"
+#include "cifsglob.h"
+#include "cifs_debug.h"
+#include "cifs_swn.h"
+
+static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
+       [CIFS_GENL_ATTR_SWN_REGISTRATION_ID]    = { .type = NLA_U32 },
+       [CIFS_GENL_ATTR_SWN_NET_NAME]           = { .type = NLA_STRING },
+       [CIFS_GENL_ATTR_SWN_SHARE_NAME]         = { .type = NLA_STRING },
+       [CIFS_GENL_ATTR_SWN_IP]                 = { .len = sizeof(struct sockaddr_storage) },
+       [CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY]    = { .type = NLA_FLAG },
+       [CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY]  = { .type = NLA_FLAG },
+       [CIFS_GENL_ATTR_SWN_IP_NOTIFY]          = { .type = NLA_FLAG },
+       [CIFS_GENL_ATTR_SWN_KRB_AUTH]           = { .type = NLA_FLAG },
+       [CIFS_GENL_ATTR_SWN_USER_NAME]          = { .type = NLA_STRING },
+       [CIFS_GENL_ATTR_SWN_PASSWORD]           = { .type = NLA_STRING },
+       [CIFS_GENL_ATTR_SWN_DOMAIN_NAME]        = { .type = NLA_STRING },
+       [CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]  = { .type = NLA_U32 },
+       [CIFS_GENL_ATTR_SWN_RESOURCE_STATE]     = { .type = NLA_U32 },
+       [CIFS_GENL_ATTR_SWN_RESOURCE_NAME]      = { .type = NLA_STRING},
+};
+
+static struct genl_ops cifs_genl_ops[] = {
+       {
+               .cmd = CIFS_GENL_CMD_SWN_NOTIFY,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit = cifs_swn_notify,
+       },
+};
+
+static const struct genl_multicast_group cifs_genl_mcgrps[] = {
+       [CIFS_GENL_MCGRP_SWN] = { .name = CIFS_GENL_MCGRP_SWN_NAME },
+};
+
+struct genl_family cifs_genl_family = {
+       .name           = CIFS_GENL_NAME,
+       .version        = CIFS_GENL_VERSION,
+       .hdrsize        = 0,
+       .maxattr        = CIFS_GENL_ATTR_MAX,
+       .module         = THIS_MODULE,
+       .policy         = cifs_genl_policy,
+       .ops            = cifs_genl_ops,
+       .n_ops          = ARRAY_SIZE(cifs_genl_ops),
+       .mcgrps         = cifs_genl_mcgrps,
+       .n_mcgrps       = ARRAY_SIZE(cifs_genl_mcgrps),
+};
+
+/**
+ * cifs_genl_init - Register generic netlink family
+ *
+ * Return zero if initialized successfully, otherwise non-zero.
+ */
+int cifs_genl_init(void)
+{
+       int ret;
+
+       ret = genl_register_family(&cifs_genl_family);
+       if (ret < 0) {
+               cifs_dbg(VFS, "%s: failed to register netlink family\n",
+                               __func__);
+               return ret;
+       }
+
+       return 0;
+}
+
+/**
+ * cifs_genl_exit - Unregister generic netlink family
+ */
+void cifs_genl_exit(void)
+{
+       int ret;
+
+       ret = genl_unregister_family(&cifs_genl_family);
+       if (ret < 0) {
+               cifs_dbg(VFS, "%s: failed to unregister netlink family\n",
+                               __func__);
+       }
+}
diff --git a/fs/cifs/netlink.h b/fs/cifs/netlink.h
new file mode 100644 (file)
index 0000000..e2fa8ed
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#ifndef _CIFS_NETLINK_H
+#define _CIFS_NETLINK_H
+
+extern struct genl_family cifs_genl_family;
+
+extern int cifs_genl_init(void);
+extern void cifs_genl_exit(void);
+
+#endif /* _CIFS_NETLINK_H */
index 799be3a..80bf4c6 100644 (file)
@@ -33,6 +33,7 @@
 #include "cifs_fs_sb.h"
 #include "cifsfs.h"
 #include "smb2proto.h"
+#include "fs_context.h"
 
 /*
  * To be safe - for UCS to UTF-8 with strings loaded with the rare long
@@ -165,8 +166,8 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
 static void
 cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 {
-       fattr->cf_uid = cifs_sb->mnt_uid;
-       fattr->cf_gid = cifs_sb->mnt_gid;
+       fattr->cf_uid = cifs_sb->ctx->linux_uid;
+       fattr->cf_gid = cifs_sb->ctx->linux_gid;
 
        /*
         * The IO_REPARSE_TAG_LX_ tags originally were used by WSL but they
@@ -177,25 +178,25 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
         * reasonably map some of them to directories vs. files vs. symlinks
         */
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
-               fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+               fattr->cf_mode = S_IFDIR | cifs_sb->ctx->dir_mode;
                fattr->cf_dtype = DT_DIR;
        } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_SYMLINK) {
-               fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_LNK;
        } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_FIFO) {
-               fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_FIFO;
        } else if (fattr->cf_cifstag == IO_REPARSE_TAG_AF_UNIX) {
-               fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_SOCK;
        } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_CHR) {
-               fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_CHR;
        } else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_BLK) {
-               fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode;
+               fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_BLK;
        } else { /* TODO: should we mark some other reparse points (like DFSR) as directories? */
-               fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+               fattr->cf_mode = S_IFREG | cifs_sb->ctx->file_mode;
                fattr->cf_dtype = DT_REG;
        }
 
index de56436..2134657 100644 (file)
 #include <linux/slab.h>
 #include "cifs_spnego.h"
 #include "smb2proto.h"
+#include "fs_context.h"
+
+static int
+cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+                    struct cifs_server_iface *iface);
 
 bool
 is_server_using_iface(struct TCP_Server_Info *server,
@@ -70,7 +75,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
 }
 
 /* returns number of channels added */
-int cifs_try_adding_channels(struct cifs_ses *ses)
+int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 {
        int old_chan_count = ses->chan_count;
        int left = ses->chan_max - ses->chan_count;
@@ -133,7 +138,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
                        continue;
                }
 
-               rc = cifs_ses_add_channel(ses, iface);
+               rc = cifs_ses_add_channel(cifs_sb, ses, iface);
                if (rc) {
                        cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n",
                                 i, rc);
@@ -166,11 +171,12 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server)
        return NULL;
 }
 
-int
-cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
+static int
+cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+                    struct cifs_server_iface *iface)
 {
        struct cifs_chan *chan;
-       struct smb_vol vol = {NULL};
+       struct smb3_fs_context ctx = {NULL};
        static const char unc_fmt[] = "\\%s\\foo";
        char unc[sizeof(unc_fmt)+SERVER_NAME_LEN_WITH_NULL] = {0};
        struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
@@ -188,67 +194,62 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
                         &ipv6->sin6_addr);
 
        /*
-        * Setup a smb_vol with mostly the same info as the existing
+        * Setup a ctx with mostly the same info as the existing
         * session and overwrite it with the requested iface data.
         *
         * We need to setup at least the fields used for negprot and
         * sesssetup.
         *
-        * We only need the volume here, so we can reuse memory from
+        * We only need the ctx here, so we can reuse memory from
         * the session and server without caring about memory
         * management.
         */
 
        /* Always make new connection for now (TODO?) */
-       vol.nosharesock = true;
+       ctx.nosharesock = true;
 
        /* Auth */
-       vol.domainauto = ses->domainAuto;
-       vol.domainname = ses->domainName;
-       vol.username = ses->user_name;
-       vol.password = ses->password;
-       vol.sectype = ses->sectype;
-       vol.sign = ses->sign;
+       ctx.domainauto = ses->domainAuto;
+       ctx.domainname = ses->domainName;
+       ctx.username = ses->user_name;
+       ctx.password = ses->password;
+       ctx.sectype = ses->sectype;
+       ctx.sign = ses->sign;
 
        /* UNC and paths */
        /* XXX: Use ses->server->hostname? */
        sprintf(unc, unc_fmt, ses->serverName);
-       vol.UNC = unc;
-       vol.prepath = "";
+       ctx.UNC = unc;
+       ctx.prepath = "";
 
        /* Reuse same version as master connection */
-       vol.vals = ses->server->vals;
-       vol.ops = ses->server->ops;
+       ctx.vals = ses->server->vals;
+       ctx.ops = ses->server->ops;
 
-       vol.noblocksnd = ses->server->noblocksnd;
-       vol.noautotune = ses->server->noautotune;
-       vol.sockopt_tcp_nodelay = ses->server->tcp_nodelay;
-       vol.echo_interval = ses->server->echo_interval / HZ;
+       ctx.noblocksnd = ses->server->noblocksnd;
+       ctx.noautotune = ses->server->noautotune;
+       ctx.sockopt_tcp_nodelay = ses->server->tcp_nodelay;
+       ctx.echo_interval = ses->server->echo_interval / HZ;
 
        /*
         * This will be used for encoding/decoding user/domain/pw
         * during sess setup auth.
-        *
-        * XXX: We use the default for simplicity but the proper way
-        * would be to use the one that ses used, which is not
-        * stored. This might break when dealing with non-ascii
-        * strings.
         */
-       vol.local_nls = load_nls_default();
+       ctx.local_nls = cifs_sb->local_nls;
 
        /* Use RDMA if possible */
-       vol.rdma = iface->rdma_capable;
-       memcpy(&vol.dstaddr, &iface->sockaddr, sizeof(struct sockaddr_storage));
+       ctx.rdma = iface->rdma_capable;
+       memcpy(&ctx.dstaddr, &iface->sockaddr, sizeof(struct sockaddr_storage));
 
        /* reuse master con client guid */
-       memcpy(&vol.client_guid, ses->server->client_guid,
+       memcpy(&ctx.client_guid, ses->server->client_guid,
               SMB2_CLIENT_GUID_SIZE);
-       vol.use_client_guid = true;
+       ctx.use_client_guid = true;
 
        mutex_lock(&ses->session_mutex);
 
        chan = ses->binding_chan = &ses->chans[ses->chan_count];
-       chan->server = cifs_get_tcp_session(&vol);
+       chan->server = cifs_get_tcp_session(&ctx);
        if (IS_ERR(chan->server)) {
                rc = PTR_ERR(chan->server);
                chan->server = NULL;
@@ -274,7 +275,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
        if (rc)
                goto out;
 
-       rc = cifs_setup_session(xid, ses, vol.local_nls);
+       rc = cifs_setup_session(xid, ses, cifs_sb->local_nls);
        if (rc)
                goto out;
 
@@ -297,7 +298,6 @@ out:
 
        if (rc && chan->server)
                cifs_put_tcp_session(chan->server, 0);
-       unload_nls(vol.local_nls);
 
        return rc;
 }
@@ -812,6 +812,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
                                return NTLMv2;
                        if (global_secflags & CIFSSEC_MAY_NTLM)
                                return NTLM;
+                       break;
                default:
                        break;
                }
index 80287c2..e31b939 100644 (file)
@@ -12,6 +12,7 @@
 #include "cifs_debug.h"
 #include "cifspdu.h"
 #include "cifs_unicode.h"
+#include "fs_context.h"
 
 /*
  * An NT cancel request header looks just like the original request except:
@@ -428,15 +429,15 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses)
 }
 
 static unsigned int
-cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
        struct TCP_Server_Info *server = tcon->ses->server;
        unsigned int wsize;
 
        /* start with specified wsize, or default */
-       if (volume_info->wsize)
-               wsize = volume_info->wsize;
+       if (ctx->wsize)
+               wsize = ctx->wsize;
        else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
                wsize = CIFS_DEFAULT_IOSIZE;
        else
@@ -463,7 +464,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
        struct TCP_Server_Info *server = tcon->ses->server;
@@ -488,7 +489,7 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
        else
                defsize = server->maxBuf - sizeof(READ_RSP);
 
-       rsize = volume_info->rsize ? volume_info->rsize : defsize;
+       rsize = ctx->rsize ? ctx->rsize : defsize;
 
        /*
         * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
@@ -1005,7 +1006,7 @@ cifs_is_read_op(__u32 oplock)
 static unsigned int
 cifs_wp_retry_size(struct inode *inode)
 {
-       return CIFS_SB(inode->i_sb)->wsize;
+       return CIFS_SB(inode->i_sb)->ctx->wsize;
 }
 
 static bool
index d88e268..60d4bd1 100644 (file)
@@ -94,6 +94,8 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
        /* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
 };
 
+#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_sync_hdr) + sizeof(struct smb2_negotiate_rsp))
+
 static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
                              __u32 non_ctxlen)
 {
@@ -107,13 +109,28 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
           (pneg_rsp->DialectRevision != cpu_to_le16(SMB311_PROT_ID)))
                return 0;
 
-       /* Make sure that negotiate contexts start after gss security blob */
+       /*
+        * if SPNEGO blob present (ie the RFC2478 GSS info which indicates
+        * which security mechanisms the server supports) make sure that
+        * the negotiate contexts start after it
+        */
        nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset);
-       if (nc_offset < non_ctxlen) {
-               pr_warn_once("Invalid negotiate context offset\n");
+       /*
+        * non_ctxlen is at least shdr->StructureSize + pdu->StructureSize2
+        * and the latter is 1 byte bigger than the fix-sized area of the
+        * NEGOTIATE response
+        */
+       if (nc_offset + 1 < non_ctxlen) {
+               pr_warn_once("Invalid negotiate context offset %d\n", nc_offset);
                return 0;
-       }
-       size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
+       } else if (nc_offset + 1 == non_ctxlen) {
+               cifs_dbg(FYI, "no SPNEGO security blob in negprot rsp\n");
+               size_of_pad_before_neg_ctxts = 0;
+       } else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE)
+               /* has padding, but no SPNEGO blob */
+               size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen + 1;
+       else
+               size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
 
        /* Verify that at least minimal negotiate contexts fit within frame */
        if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) {
@@ -859,6 +876,10 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
  *
  * Assumes @iov does not contain the rfc1002 length and iov[0] has the
  * SMB2 header.
+ *
+ * @ses:       server session structure
+ * @iov:       array containing the SMB request we will send to the server
+ * @nvec:      number of array entries for the iov
  */
 int
 smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
index 3d914d7..f192748 100644 (file)
@@ -24,6 +24,7 @@
 #include "smb2glob.h"
 #include "cifs_ioctl.h"
 #include "smbdirect.h"
+#include "fs_context.h"
 
 /* Change credits for different ops and return the total number of credits */
 static int
@@ -99,9 +100,10 @@ smb2_add_credits(struct TCP_Server_Info *server,
        spin_unlock(&server->req_lock);
        wake_up(&server->request_q);
 
-       if (reconnect_detected)
+       if (reconnect_detected) {
                cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
                         add, instance);
+       }
 
        if (server->tcpStatus == CifsNeedReconnect
            || server->tcpStatus == CifsExiting)
@@ -123,7 +125,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
        default:
                trace_smb3_add_credits(server->CurrentMid,
                        server->hostname, rc, add);
-               cifs_dbg(FYI, "add %u credits total=%d\n", add, rc);
+               cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc);
        }
 }
 
@@ -135,6 +137,11 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val)
        if (val == 1)
                server->reconnect_instance++;
        spin_unlock(&server->req_lock);
+
+       trace_smb3_set_credits(server->CurrentMid,
+                       server->hostname, val, val);
+       cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
+
        /* don't log while holding the lock */
        if (val == 1)
                cifs_dbg(FYI, "set credits to 1 due to smb2 reconnect\n");
@@ -201,6 +208,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
                                DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
                        credits->instance = server->reconnect_instance;
                        server->credits -= credits->value;
+                       scredits = server->credits;
                        server->in_flight++;
                        if (server->in_flight > server->max_in_flight)
                                server->max_in_flight = server->in_flight;
@@ -208,6 +216,12 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
                }
        }
        spin_unlock(&server->req_lock);
+
+       trace_smb3_add_credits(server->CurrentMid,
+                       server->hostname, scredits, -(credits->value));
+       cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
+                       __func__, credits->value, scredits);
+
        return rc;
 }
 
@@ -217,13 +231,17 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
                    const unsigned int payload_size)
 {
        int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
+       int scredits;
 
        if (!credits->value || credits->value == new_val)
                return 0;
 
        if (credits->value < new_val) {
-               WARN_ONCE(1, "request has less credits (%d) than required (%d)",
-                         credits->value, new_val);
+               trace_smb3_too_many_credits(server->CurrentMid,
+                               server->hostname, 0, credits->value - new_val);
+               cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
+                               credits->value, new_val);
+
                return -ENOTSUPP;
        }
 
@@ -231,15 +249,24 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 
        if (server->reconnect_instance != credits->instance) {
                spin_unlock(&server->req_lock);
+               trace_smb3_reconnect_detected(server->CurrentMid,
+                       server->hostname, 0, 0);
                cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
                         credits->value - new_val);
                return -EAGAIN;
        }
 
        server->credits += credits->value - new_val;
+       scredits = server->credits;
        spin_unlock(&server->req_lock);
        wake_up(&server->request_q);
        credits->value = new_val;
+
+       trace_smb3_add_credits(server->CurrentMid,
+                       server->hostname, scredits, credits->value - new_val);
+       cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
+                       __func__, credits->value - new_val, scredits);
+
        return 0;
 }
 
@@ -339,13 +366,13 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 }
 
 static unsigned int
-smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *server = tcon->ses->server;
        unsigned int wsize;
 
        /* start with specified wsize, or default */
-       wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
+       wsize = ctx->wsize ? ctx->wsize : CIFS_DEFAULT_IOSIZE;
        wsize = min_t(unsigned int, wsize, server->max_write);
        if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
                wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
@@ -354,13 +381,13 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *server = tcon->ses->server;
        unsigned int wsize;
 
        /* start with specified wsize, or default */
-       wsize = volume_info->wsize ? volume_info->wsize : SMB3_DEFAULT_IOSIZE;
+       wsize = ctx->wsize ? ctx->wsize : SMB3_DEFAULT_IOSIZE;
        wsize = min_t(unsigned int, wsize, server->max_write);
 #ifdef CONFIG_CIFS_SMB_DIRECT
        if (server->rdma) {
@@ -386,13 +413,13 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *server = tcon->ses->server;
        unsigned int rsize;
 
        /* start with specified rsize, or default */
-       rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
+       rsize = ctx->rsize ? ctx->rsize : CIFS_DEFAULT_IOSIZE;
        rsize = min_t(unsigned int, rsize, server->max_read);
 
        if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
@@ -402,13 +429,13 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
        struct TCP_Server_Info *server = tcon->ses->server;
        unsigned int rsize;
 
        /* start with specified rsize, or default */
-       rsize = volume_info->rsize ? volume_info->rsize : SMB3_DEFAULT_IOSIZE;
+       rsize = ctx->rsize ? ctx->rsize : SMB3_DEFAULT_IOSIZE;
        rsize = min_t(unsigned int, rsize, server->max_read);
 #ifdef CONFIG_CIFS_SMB_DIRECT
        if (server->rdma) {
@@ -477,7 +504,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
                goto out;
        }
 
-       if (bytes_left || p->Next)
+       /* Azure rounds the buffer size up 8, to a 16 byte boundary */
+       if ((bytes_left > 8) || p->Next)
                cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
 
 
@@ -2341,6 +2369,7 @@ static bool
 smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
 {
        struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+       int scredits;
 
        if (shdr->Status != STATUS_PENDING)
                return false;
@@ -2348,8 +2377,14 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
        if (shdr->CreditRequest) {
                spin_lock(&server->req_lock);
                server->credits += le16_to_cpu(shdr->CreditRequest);
+               scredits = server->credits;
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
+
+               trace_smb3_add_credits(server->CurrentMid,
+                               server->hostname, scredits, le16_to_cpu(shdr->CreditRequest));
+               cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
+                               __func__, le16_to_cpu(shdr->CreditRequest), scredits);
        }
 
        return true;
@@ -3179,7 +3214,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
 
 static struct cifs_ntsd *
 get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
-               const struct cifs_fid *cifsfid, u32 *pacllen)
+                   const struct cifs_fid *cifsfid, u32 *pacllen, u32 info)
 {
        struct cifs_ntsd *pntsd = NULL;
        unsigned int xid;
@@ -3193,7 +3228,8 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
        cifs_dbg(FYI, "trying to get acl\n");
 
        rc = SMB2_query_acl(xid, tlink_tcon(tlink), cifsfid->persistent_fid,
-                           cifsfid->volatile_fid, (void **)&pntsd, pacllen);
+                           cifsfid->volatile_fid, (void **)&pntsd, pacllen,
+                           info);
        free_xid(xid);
 
        cifs_put_tlink(tlink);
@@ -3207,7 +3243,7 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
 
 static struct cifs_ntsd *
 get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
-               const char *path, u32 *pacllen)
+                    const char *path, u32 *pacllen, u32 info)
 {
        struct cifs_ntsd *pntsd = NULL;
        u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
@@ -3245,12 +3281,16 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
        oparms.fid = &fid;
        oparms.reconnect = false;
 
+       if (info & SACL_SECINFO)
+               oparms.desired_access |= SYSTEM_SECURITY;
+
        rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL,
                       NULL);
        kfree(utf16_path);
        if (!rc) {
                rc = SMB2_query_acl(xid, tlink_tcon(tlink), fid.persistent_fid,
-                           fid.volatile_fid, (void **)&pntsd, pacllen);
+                                   fid.volatile_fid, (void **)&pntsd, pacllen,
+                                   info);
                SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
        }
 
@@ -3284,10 +3324,12 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
        tcon = tlink_tcon(tlink);
        xid = get_xid();
 
-       if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
-               access_flags = WRITE_OWNER;
-       else
-               access_flags = WRITE_DAC;
+       if (aclflag & CIFS_ACL_OWNER || aclflag & CIFS_ACL_GROUP)
+               access_flags |= WRITE_OWNER;
+       if (aclflag & CIFS_ACL_SACL)
+               access_flags |= SYSTEM_SECURITY;
+       if (aclflag & CIFS_ACL_DACL)
+               access_flags |= WRITE_DAC;
 
        utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
        if (!utf16_path) {
@@ -3321,18 +3363,18 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 /* Retrieve an ACL from the server */
 static struct cifs_ntsd *
 get_smb2_acl(struct cifs_sb_info *cifs_sb,
-                                     struct inode *inode, const char *path,
-                                     u32 *pacllen)
+            struct inode *inode, const char *path,
+            u32 *pacllen, u32 info)
 {
        struct cifs_ntsd *pntsd = NULL;
        struct cifsFileInfo *open_file = NULL;
 
-       if (inode)
+       if (inode && !(info & SACL_SECINFO))
                open_file = find_readable_file(CIFS_I(inode), true);
-       if (!open_file)
-               return get_smb2_acl_by_path(cifs_sb, path, pacllen);
+       if (!open_file || (info & SACL_SECINFO))
+               return get_smb2_acl_by_path(cifs_sb, path, pacllen, info);
 
-       pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen);
+       pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info);
        cifsFileInfo_put(open_file);
        return pntsd;
 }
@@ -3949,7 +3991,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
 static unsigned int
 smb2_wp_retry_size(struct inode *inode)
 {
-       return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize,
+       return min_t(unsigned int, CIFS_SB(inode->i_sb)->ctx->wsize,
                     SMB2_MAX_BUFFER_SIZE);
 }
 
index acb7270..067eb44 100644 (file)
@@ -427,8 +427,8 @@ build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
        pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
        pneg_ctxt->DataLength = cpu_to_le16(38);
        pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
-       pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
-       get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+       pneg_ctxt->SaltLength = cpu_to_le16(SMB311_LINUX_CLIENT_SALT_SIZE);
+       get_random_bytes(pneg_ctxt->Salt, SMB311_LINUX_CLIENT_SALT_SIZE);
        pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512;
 }
 
@@ -566,6 +566,9 @@ static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
        if (len < MIN_PREAUTH_CTXT_DATA_LEN) {
                pr_warn_once("server sent bad preauth context\n");
                return;
+       } else if (len < MIN_PREAUTH_CTXT_DATA_LEN + le16_to_cpu(ctxt->SaltLength)) {
+               pr_warn_once("server sent invalid SaltLength\n");
+               return;
        }
        if (le16_to_cpu(ctxt->HashAlgorithmCount) != 1)
                pr_warn_once("Invalid SMB3 hash algorithm count\n");
@@ -3476,10 +3479,11 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 int
 SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
-               u64 persistent_fid, u64 volatile_fid,
-               void **data, u32 *plen)
+              u64 persistent_fid, u64 volatile_fid,
+              void **data, u32 *plen, u32 extra_info)
 {
-       __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO;
+       __u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+                               extra_info;
        *plen = 0;
 
        return query_info(xid, tcon, persistent_fid, volatile_fid,
index fa57b03..204a622 100644 (file)
@@ -333,12 +333,20 @@ struct smb2_neg_context {
        /* Followed by array of data */
 } __packed;
 
-#define SMB311_SALT_SIZE                       32
+#define SMB311_LINUX_CLIENT_SALT_SIZE                  32
 /* Hash Algorithm Types */
 #define SMB2_PREAUTH_INTEGRITY_SHA512  cpu_to_le16(0x0001)
 #define SMB2_PREAUTH_HASH_SIZE 64
 
-#define MIN_PREAUTH_CTXT_DATA_LEN      (SMB311_SALT_SIZE + 6)
+/*
+ * SaltLength that the server send can be zero, so the only three required
+ * fields (all __le16) end up six bytes total, so the minimum context data len
+ * in the response is six bytes which accounts for
+ *
+ *      HashAlgorithmCount, SaltLength, and 1 HashAlgorithm.
+ */
+#define MIN_PREAUTH_CTXT_DATA_LEN 6
+
 struct smb2_preauth_neg_context {
        __le16  ContextType; /* 1 */
        __le16  DataLength;
@@ -346,7 +354,7 @@ struct smb2_preauth_neg_context {
        __le16  HashAlgorithmCount; /* 1 */
        __le16  SaltLength;
        __le16  HashAlgorithms; /* HashAlgorithms[0] since only one defined */
-       __u8    Salt[SMB311_SALT_SIZE];
+       __u8    Salt[SMB311_LINUX_CLIENT_SALT_SIZE];
 } __packed;
 
 /* Encryption Algorithms Ciphers */
index d411044..9565e27 100644 (file)
@@ -200,8 +200,8 @@ extern int SMB2_query_info_init(struct cifs_tcon *tcon,
                                size_t input_len, void *input);
 extern void SMB2_query_info_free(struct smb_rqst *rqst);
 extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
-                          u64 persistent_file_id, u64 volatile_file_id,
-                          void **data, unsigned int *plen);
+                         u64 persistent_file_id, u64 volatile_file_id,
+                         void **data, unsigned int *plen, u32 info);
 extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
                            u64 persistent_fid, u64 volatile_fid,
                            __le64 *uniqueid);
index b029ed3..10dfe50 100644 (file)
@@ -246,6 +246,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context)
        case IB_EVENT_CQ_ERR:
        case IB_EVENT_QP_FATAL:
                smbd_disconnect_rdma_connection(info);
+               break;
 
        default:
                break;
index 90e0fab..c3d1a58 100644 (file)
@@ -909,8 +909,12 @@ DEFINE_EVENT(smb3_credit_class, smb3_##name,  \
        TP_ARGS(currmid, hostname, credits, credits_to_add))
 
 DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
+DEFINE_SMB3_CREDIT_EVENT(reconnect_detected);
 DEFINE_SMB3_CREDIT_EVENT(credit_timeout);
+DEFINE_SMB3_CREDIT_EVENT(insufficient_credits);
+DEFINE_SMB3_CREDIT_EVENT(too_many_credits);
 DEFINE_SMB3_CREDIT_EVENT(add_credits);
+DEFINE_SMB3_CREDIT_EVENT(set_credits);
 
 #endif /* _CIFS_TRACE_H */
 
index 36b2ece..e9abb41 100644 (file)
@@ -527,6 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
        int *credits;
        int optype;
        long int t;
+       int scredits = server->credits;
 
        if (timeout < 0)
                t = MAX_JIFFY_OFFSET;
@@ -624,12 +625,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                        /* update # of requests on the wire to server */
                        if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
                                *credits -= num_credits;
+                               scredits = *credits;
                                server->in_flight += num_credits;
                                if (server->in_flight > server->max_in_flight)
                                        server->max_in_flight = server->in_flight;
                                *instance = server->reconnect_instance;
                        }
                        spin_unlock(&server->req_lock);
+
+                       trace_smb3_add_credits(server->CurrentMid,
+                                       server->hostname, scredits, -(num_credits));
+                       cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+                                       __func__, num_credits, scredits);
                        break;
                }
        }
@@ -649,10 +656,14 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
                          const int flags, unsigned int *instance)
 {
        int *credits;
+       int scredits, sin_flight;
 
        credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
 
        spin_lock(&server->req_lock);
+       scredits = *credits;
+       sin_flight = server->in_flight;
+
        if (*credits < num) {
                /*
                 * Return immediately if not too many requests in flight since
@@ -660,6 +671,10 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
                 */
                if (server->in_flight < num - *credits) {
                        spin_unlock(&server->req_lock);
+                       trace_smb3_insufficient_credits(server->CurrentMid,
+                                       server->hostname, scredits, sin_flight);
+                       cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
+                                       __func__, sin_flight, num, scredits);
                        return -ENOTSUPP;
                }
        }
diff --git a/fs/cifs/unc.c b/fs/cifs/unc.c
new file mode 100644 (file)
index 0000000..394aa00
--- /dev/null
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *   Copyright (C) 2020, Microsoft Corporation.
+ *
+ *   Author(s): Steve French <stfrench@microsoft.com>
+ *              Suresh Jayaraman <sjayaraman@suse.de>
+ *              Jeff Layton <jlayton@kernel.org>
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/inet.h>
+#include <linux/ctype.h>
+#include "cifsglob.h"
+#include "cifsproto.h"
+
+/* extract the host portion of the UNC string */
+char *extract_hostname(const char *unc)
+{
+       const char *src;
+       char *dst, *delim;
+       unsigned int len;
+
+       /* skip double chars at beginning of string */
+       /* BB: check validity of these bytes? */
+       if (strlen(unc) < 3)
+               return ERR_PTR(-EINVAL);
+       for (src = unc; *src && *src == '\\'; src++)
+               ;
+       if (!*src)
+               return ERR_PTR(-EINVAL);
+
+       /* delimiter between hostname and sharename is always '\\' now */
+       delim = strchr(src, '\\');
+       if (!delim)
+               return ERR_PTR(-EINVAL);
+
+       len = delim - src;
+       dst = kmalloc((len + 1), GFP_KERNEL);
+       if (dst == NULL)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(dst, src, len);
+       dst[len] = '\0';
+
+       return dst;
+}
+
+char *extract_sharename(const char *unc)
+{
+       const char *src;
+       char *delim, *dst;
+       int len;
+
+       /* skip double chars at the beginning */
+       src = unc + 2;
+
+       /* share name is always preceded by '\\' now */
+       delim = strchr(src, '\\');
+       if (!delim)
+               return ERR_PTR(-EINVAL);
+       delim++;
+       len = strlen(delim);
+
+       /* caller has to free the memory */
+       dst = kstrndup(delim, len, GFP_KERNEL);
+       if (!dst)
+               return ERR_PTR(-ENOMEM);
+
+       return dst;
+}
index b829917..6b658a1 100644 (file)
@@ -34,6 +34,7 @@
 #define MAX_EA_VALUE_SIZE CIFSMaxBufSize
 #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" /* DACL only */
 #define CIFS_XATTR_CIFS_NTSD "system.cifs_ntsd" /* owner plus DACL */
+#define CIFS_XATTR_CIFS_NTSD_FULL "system.cifs_ntsd_full" /* owner/DACL/SACL */
 #define CIFS_XATTR_ATTRIB "cifs.dosattrib"  /* full name: user.cifs.dosattrib */
 #define CIFS_XATTR_CREATETIME "cifs.creationtime"  /* user.cifs.creationtime */
 /*
  */
 #define SMB3_XATTR_CIFS_ACL "system.smb3_acl" /* DACL only */
 #define SMB3_XATTR_CIFS_NTSD "system.smb3_ntsd" /* owner plus DACL */
+#define SMB3_XATTR_CIFS_NTSD_FULL "system.smb3_ntsd_full" /* owner/DACL/SACL */
 #define SMB3_XATTR_ATTRIB "smb3.dosattrib"  /* full name: user.smb3.dosattrib */
 #define SMB3_XATTR_CREATETIME "smb3.creationtime"  /* user.smb3.creationtime */
 /* BB need to add server (Samba e.g) support for security and trusted prefix */
 
 enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT,
-       XATTR_CIFS_NTSD };
+       XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL };
 
 static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon,
                           struct inode *inode, char *full_path,
@@ -164,7 +166,8 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
                break;
 
        case XATTR_CIFS_ACL:
-       case XATTR_CIFS_NTSD: {
+       case XATTR_CIFS_NTSD:
+       case XATTR_CIFS_NTSD_FULL: {
                struct cifs_ntsd *pacl;
 
                if (!value)
@@ -174,23 +177,27 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
                        rc = -ENOMEM;
                } else {
                        memcpy(pacl, value, size);
-                       if (value &&
-                           pTcon->ses->server->ops->set_acl) {
+                       if (pTcon->ses->server->ops->set_acl) {
+                               int aclflags = 0;
                                rc = 0;
-                               if (handler->flags == XATTR_CIFS_NTSD) {
-                                       /* set owner and DACL */
-                                       rc = pTcon->ses->server->ops->set_acl(
-                                                       pacl, size, inode,
-                                                       full_path,
-                                                       CIFS_ACL_OWNER);
-                               }
-                               if (rc == 0) {
-                                       /* set DACL */
-                                       rc = pTcon->ses->server->ops->set_acl(
-                                                       pacl, size, inode,
-                                                       full_path,
-                                                       CIFS_ACL_DACL);
+
+                               switch (handler->flags) {
+                               case XATTR_CIFS_NTSD_FULL:
+                                       aclflags = (CIFS_ACL_OWNER |
+                                                   CIFS_ACL_DACL |
+                                                   CIFS_ACL_SACL);
+                                       break;
+                               case XATTR_CIFS_NTSD:
+                                       aclflags = (CIFS_ACL_OWNER |
+                                                   CIFS_ACL_DACL);
+                                       break;
+                               case XATTR_CIFS_ACL:
+                               default:
+                                       aclflags = CIFS_ACL_DACL;
                                }
+
+                               rc = pTcon->ses->server->ops->set_acl(pacl,
+                                       size, inode, full_path, aclflags);
                        } else {
                                rc = -EOPNOTSUPP;
                        }
@@ -327,16 +334,25 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
                break;
 
        case XATTR_CIFS_ACL:
-       case XATTR_CIFS_NTSD: {
-               /* the whole ntsd is fetched regardless */
-               u32 acllen;
+       case XATTR_CIFS_NTSD:
+       case XATTR_CIFS_NTSD_FULL: {
+               /*
+                * fetch owner, DACL, and SACL if asked for full descriptor,
+                * fetch owner and DACL otherwise
+                */
+               u32 acllen, extra_info;
                struct cifs_ntsd *pacl;
 
                if (pTcon->ses->server->ops->get_acl == NULL)
                        goto out; /* rc already EOPNOTSUPP */
 
+               if (handler->flags == XATTR_CIFS_NTSD_FULL) {
+                       extra_info = SACL_SECINFO;
+               } else {
+                       extra_info = 0;
+               }
                pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
-                               inode, full_path, &acllen);
+                               inode, full_path, &acllen, extra_info);
                if (IS_ERR(pacl)) {
                        rc = PTR_ERR(pacl);
                        cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -486,6 +502,27 @@ static const struct xattr_handler smb3_ntsd_xattr_handler = {
        .set = cifs_xattr_set,
 };
 
+static const struct xattr_handler cifs_cifs_ntsd_full_xattr_handler = {
+       .name = CIFS_XATTR_CIFS_NTSD_FULL,
+       .flags = XATTR_CIFS_NTSD_FULL,
+       .get = cifs_xattr_get,
+       .set = cifs_xattr_set,
+};
+
+/*
+ * Although this is just an alias for the above, need to move away from
+ * confusing users and using the 20 year old term 'cifs' when it is no
+ * longer secure and was replaced by SMB2/SMB3 a long time ago, and
+ * SMB3 and later are highly secure.
+ */
+static const struct xattr_handler smb3_ntsd_full_xattr_handler = {
+       .name = SMB3_XATTR_CIFS_NTSD_FULL,
+       .flags = XATTR_CIFS_NTSD_FULL,
+       .get = cifs_xattr_get,
+       .set = cifs_xattr_set,
+};
+
+
 static const struct xattr_handler cifs_posix_acl_access_xattr_handler = {
        .name = XATTR_NAME_POSIX_ACL_ACCESS,
        .flags = XATTR_ACL_ACCESS,
@@ -507,6 +544,8 @@ const struct xattr_handler *cifs_xattr_handlers[] = {
        &smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */
        &cifs_cifs_ntsd_xattr_handler,
        &smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */
+       &cifs_cifs_ntsd_full_xattr_handler,
+       &smb3_ntsd_full_xattr_handler, /* alias for above since avoiding "cifs" */
        &cifs_posix_acl_access_xattr_handler,
        &cifs_posix_acl_default_xattr_handler,
        NULL
index b0983e2..b839dd1 100644 (file)
@@ -267,6 +267,7 @@ static void configfs_remove_dirent(struct dentry *dentry)
  *     configfs_create_dir - create a directory for an config_item.
  *     @item:          config_itemwe're creating directory for.
  *     @dentry:        config_item's dentry.
+ *     @frag:          config_item's fragment.
  *
  *     Note: user-created entries won't be allowed under this new directory
  *     until it is validated by configfs_dir_set_ready()
index df466ef..e265b6d 100644 (file)
@@ -182,11 +182,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait)
        return events;
 }
 
-static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
 {
+       lockdep_assert_held(&ctx->wqh.lock);
+
        *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
        ctx->count -= *cnt;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
 
 /**
  * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
index 10b81e6..a829af0 100644 (file)
@@ -389,19 +389,24 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
  *
  * we must do our busy polling with irqs enabled
  */
-static void ep_busy_loop(struct eventpoll *ep, int nonblock)
+static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
 {
        unsigned int napi_id = READ_ONCE(ep->napi_id);
 
-       if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
+       if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) {
                napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false,
                               BUSY_POLL_BUDGET);
-}
-
-static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
-{
-       if (ep->napi_id)
+               if (ep_events_available(ep))
+                       return true;
+               /*
+                * Busy poll timed out.  Drop NAPI ID for now, we can add
+                * it back in when we have moved a socket with a valid NAPI
+                * ID onto the ready list.
+                */
                ep->napi_id = 0;
+               return false;
+       }
+       return false;
 }
 
 /*
@@ -441,12 +446,9 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
 
 #else
 
-static inline void ep_busy_loop(struct eventpoll *ep, int nonblock)
-{
-}
-
-static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
+static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
 {
+       return false;
 }
 
 static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
@@ -1625,6 +1627,14 @@ static int ep_send_events(struct eventpoll *ep,
        poll_table pt;
        int res = 0;
 
+       /*
+        * Always short-circuit for fatal signals to allow threads to make a
+        * timely exit without the chance of finding more events available and
+        * fetching repeatedly.
+        */
+       if (fatal_signal_pending(current))
+               return -EINTR;
+
        init_poll_funcptr(&pt, NULL);
 
        mutex_lock(&ep->mtx);
@@ -1702,15 +1712,25 @@ static int ep_send_events(struct eventpoll *ep,
        return res;
 }
 
-static inline struct timespec64 ep_set_mstimeout(long ms)
+static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms)
 {
-       struct timespec64 now, ts = {
-               .tv_sec = ms / MSEC_PER_SEC,
-               .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
-       };
+       struct timespec64 now;
+
+       if (ms < 0)
+               return NULL;
+
+       if (!ms) {
+               to->tv_sec = 0;
+               to->tv_nsec = 0;
+               return to;
+       }
+
+       to->tv_sec = ms / MSEC_PER_SEC;
+       to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC);
 
        ktime_get_ts64(&now);
-       return timespec64_add_safe(now, ts);
+       *to = timespec64_add_safe(now, *to);
+       return to;
 }
 
 /**
@@ -1722,8 +1742,8 @@ static inline struct timespec64 ep_set_mstimeout(long ms)
  *          stored.
  * @maxevents: Size (in terms of number of events) of the caller event buffer.
  * @timeout: Maximum timeout for the ready events fetch operation, in
- *           milliseconds. If the @timeout is zero, the function will not block,
- *           while if the @timeout is less than zero, the function will block
+ *           timespec. If the timeout is zero, the function will not block,
+ *           while if the @timeout ptr is NULL, the function will block
  *           until at least one event has been retrieved (or an error
  *           occurred).
  *
@@ -1731,55 +1751,59 @@ static inline struct timespec64 ep_set_mstimeout(long ms)
  *          error code, in case of error.
  */
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
-                  int maxevents, long timeout)
+                  int maxevents, struct timespec64 *timeout)
 {
-       int res = 0, eavail, timed_out = 0;
+       int res, eavail, timed_out = 0;
        u64 slack = 0;
        wait_queue_entry_t wait;
        ktime_t expires, *to = NULL;
 
        lockdep_assert_irqs_enabled();
 
-       if (timeout > 0) {
-               struct timespec64 end_time = ep_set_mstimeout(timeout);
-
-               slack = select_estimate_accuracy(&end_time);
+       if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
+               slack = select_estimate_accuracy(timeout);
                to = &expires;
-               *to = timespec64_to_ktime(end_time);
-       } else if (timeout == 0) {
+               *to = timespec64_to_ktime(*timeout);
+       } else if (timeout) {
                /*
                 * Avoid the unnecessary trip to the wait queue loop, if the
-                * caller specified a non blocking operation. We still need
-                * lock because we could race and not see an epi being added
-                * to the ready list while in irq callback. Thus incorrectly
-                * returning 0 back to userspace.
+                * caller specified a non blocking operation.
                 */
                timed_out = 1;
-
-               write_lock_irq(&ep->lock);
-               eavail = ep_events_available(ep);
-               write_unlock_irq(&ep->lock);
-
-               goto send_events;
        }
 
-fetch_events:
+       /*
+        * This call is racy: We may or may not see events that are being added
+        * to the ready list under the lock (e.g., in IRQ callbacks). For, cases
+        * with a non-zero timeout, this thread will check the ready list under
+        * lock and will added to the wait queue.  For, cases with a zero
+        * timeout, the user by definition should not care and will have to
+        * recheck again.
+        */
+       eavail = ep_events_available(ep);
+
+       while (1) {
+               if (eavail) {
+                       /*
+                        * Try to transfer events to user space. In case we get
+                        * 0 events and there's still timeout left over, we go
+                        * trying again in search of more luck.
+                        */
+                       res = ep_send_events(ep, events, maxevents);
+                       if (res)
+                               return res;
+               }
 
-       if (!ep_events_available(ep))
-               ep_busy_loop(ep, timed_out);
+               if (timed_out)
+                       return 0;
 
-       eavail = ep_events_available(ep);
-       if (eavail)
-               goto send_events;
+               eavail = ep_busy_loop(ep, timed_out);
+               if (eavail)
+                       continue;
 
-       /*
-        * Busy poll timed out.  Drop NAPI ID for now, we can add
-        * it back in when we have moved a socket with a valid NAPI
-        * ID onto the ready list.
-        */
-       ep_reset_busy_poll_napi_id(ep);
+               if (signal_pending(current))
+                       return -EINTR;
 
-       do {
                /*
                 * Internally init_wait() uses autoremove_wake_function(),
                 * thus wait entry is removed from the wait queue on each
@@ -1809,55 +1833,38 @@ fetch_events:
                 * important.
                 */
                eavail = ep_events_available(ep);
-               if (!eavail) {
-                       if (signal_pending(current))
-                               res = -EINTR;
-                       else
-                               __add_wait_queue_exclusive(&ep->wq, &wait);
-               }
-               write_unlock_irq(&ep->lock);
-
-               if (eavail || res)
-                       break;
-
-               if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
-                       timed_out = 1;
-                       break;
-               }
-
-               /* We were woken up, thus go and try to harvest some events */
-               eavail = 1;
-
-       } while (0);
+               if (!eavail)
+                       __add_wait_queue_exclusive(&ep->wq, &wait);
 
-       __set_current_state(TASK_RUNNING);
-
-       if (!list_empty_careful(&wait.entry)) {
-               write_lock_irq(&ep->lock);
-               __remove_wait_queue(&ep->wq, &wait);
                write_unlock_irq(&ep->lock);
-       }
 
-send_events:
-       if (fatal_signal_pending(current)) {
+               if (!eavail)
+                       timed_out = !schedule_hrtimeout_range(to, slack,
+                                                             HRTIMER_MODE_ABS);
+               __set_current_state(TASK_RUNNING);
+
                /*
-                * Always short-circuit for fatal signals to allow
-                * threads to make a timely exit without the chance of
-                * finding more events available and fetching
-                * repeatedly.
+                * We were woken up, thus go and try to harvest some events.
+                * If timed out and still on the wait queue, recheck eavail
+                * carefully under lock, below.
                 */
-               res = -EINTR;
-       }
-       /*
-        * Try to transfer events to user space. In case we get 0 events and
-        * there's still timeout left over, we go trying again in search of
-        * more luck.
-        */
-       if (!res && eavail &&
-           !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
-               goto fetch_events;
+               eavail = 1;
 
-       return res;
+               if (!list_empty_careful(&wait.entry)) {
+                       write_lock_irq(&ep->lock);
+                       /*
+                        * If the thread timed out and is not on the wait queue,
+                        * it means that the thread was woken up after its
+                        * timeout expired before it could reacquire the lock.
+                        * Thus, when wait.entry is empty, it needs to harvest
+                        * events.
+                        */
+                       if (timed_out)
+                               eavail = list_empty(&wait.entry);
+                       __remove_wait_queue(&ep->wq, &wait);
+                       write_unlock_irq(&ep->lock);
+               }
+       }
 }
 
 /**
@@ -2176,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
  * part of the user space epoll_wait(2).
  */
 static int do_epoll_wait(int epfd, struct epoll_event __user *events,
-                        int maxevents, int timeout)
+                        int maxevents, struct timespec64 *to)
 {
        int error;
        struct fd f;
@@ -2210,7 +2217,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
        ep = f.file->private_data;
 
        /* Time to fish for events ... */
-       error = ep_poll(ep, events, maxevents, timeout);
+       error = ep_poll(ep, events, maxevents, to);
 
 error_fput:
        fdput(f);
@@ -2220,16 +2227,19 @@ error_fput:
 SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
                int, maxevents, int, timeout)
 {
-       return do_epoll_wait(epfd, events, maxevents, timeout);
+       struct timespec64 to;
+
+       return do_epoll_wait(epfd, events, maxevents,
+                            ep_timeout_to_timespec(&to, timeout));
 }
 
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
  * part of the user space epoll_pwait(2).
  */
-SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
-               int, maxevents, int, timeout, const sigset_t __user *, sigmask,
-               size_t, sigsetsize)
+static int do_epoll_pwait(int epfd, struct epoll_event __user *events,
+                         int maxevents, struct timespec64 *to,
+                         const sigset_t __user *sigmask, size_t sigsetsize)
 {
        int error;
 
@@ -2241,18 +2251,47 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
        if (error)
                return error;
 
-       error = do_epoll_wait(epfd, events, maxevents, timeout);
+       error = do_epoll_wait(epfd, events, maxevents, to);
+
        restore_saved_sigmask_unless(error == -EINTR);
 
        return error;
 }
 
+SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
+               int, maxevents, int, timeout, const sigset_t __user *, sigmask,
+               size_t, sigsetsize)
+{
+       struct timespec64 to;
+
+       return do_epoll_pwait(epfd, events, maxevents,
+                             ep_timeout_to_timespec(&to, timeout),
+                             sigmask, sigsetsize);
+}
+
+SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events,
+               int, maxevents, const struct __kernel_timespec __user *, timeout,
+               const sigset_t __user *, sigmask, size_t, sigsetsize)
+{
+       struct timespec64 ts, *to = NULL;
+
+       if (timeout) {
+               if (get_timespec64(&ts, timeout))
+                       return -EFAULT;
+               to = &ts;
+               if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+                       return -EINVAL;
+       }
+
+       return do_epoll_pwait(epfd, events, maxevents, to,
+                             sigmask, sigsetsize);
+}
+
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
-                       struct epoll_event __user *, events,
-                       int, maxevents, int, timeout,
-                       const compat_sigset_t __user *, sigmask,
-                       compat_size_t, sigsetsize)
+static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events,
+                                int maxevents, struct timespec64 *timeout,
+                                const compat_sigset_t __user *sigmask,
+                                compat_size_t sigsetsize)
 {
        long err;
 
@@ -2265,10 +2304,46 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
                return err;
 
        err = do_epoll_wait(epfd, events, maxevents, timeout);
+
        restore_saved_sigmask_unless(err == -EINTR);
 
        return err;
 }
+
+COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
+                      struct epoll_event __user *, events,
+                      int, maxevents, int, timeout,
+                      const compat_sigset_t __user *, sigmask,
+                      compat_size_t, sigsetsize)
+{
+       struct timespec64 to;
+
+       return do_compat_epoll_pwait(epfd, events, maxevents,
+                                    ep_timeout_to_timespec(&to, timeout),
+                                    sigmask, sigsetsize);
+}
+
+COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd,
+                      struct epoll_event __user *, events,
+                      int, maxevents,
+                      const struct __kernel_timespec __user *, timeout,
+                      const compat_sigset_t __user *, sigmask,
+                      compat_size_t, sigsetsize)
+{
+       struct timespec64 ts, *to = NULL;
+
+       if (timeout) {
+               if (get_timespec64(&ts, timeout))
+                       return -EFAULT;
+               to = &ts;
+               if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+                       return -EINVAL;
+       }
+
+       return do_compat_epoll_pwait(epfd, events, maxevents, to,
+                                    sigmask, sigsetsize);
+}
+
 #endif
 
 static int __init eventpoll_init(void)
index 675d0e7..314d540 100644 (file)
@@ -659,7 +659,7 @@ static int exfat_load_upcase_table(struct super_block *sb,
        unsigned char skip = false;
        unsigned short *upcase_table;
 
-       upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
+       upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
        if (!upcase_table)
                return -ENOMEM;
 
@@ -715,7 +715,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb)
        unsigned short uni = 0, *upcase_table;
        unsigned int index = 0;
 
-       upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
+       upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
        if (!upcase_table)
                return -ENOMEM;
 
@@ -803,5 +803,5 @@ load_default:
 
 void exfat_free_upcase_table(struct exfat_sb_info *sbi)
 {
-       kfree(sbi->vol_utbl);
+       kvfree(sbi->vol_utbl);
 }
index 8434e0a..c0b6096 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -694,8 +694,10 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
                 * If the requested range is greater than the current maximum,
                 * we're closing everything so only copy all file descriptors
                 * beneath the lowest file descriptor.
+                * If the caller requested all fds to be made cloexec copy all
+                * of the file descriptors since they still want to use them.
                 */
-               if (max_fd >= cur_max)
+               if (!(flags & CLOSE_RANGE_CLOEXEC) && (max_fd >= cur_max))
                        max_unshare_fds = fd;
 
                ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
index 35a6fd1..d87a5bc 100644 (file)
@@ -857,12 +857,6 @@ static void delete_work_func(struct work_struct *work)
        clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
        spin_unlock(&gl->gl_lockref.lock);
 
-       /* If someone's using this glock to create a new dinode, the block must
-          have been freed by another node, then re-used, in which case our
-          iopen callback is too late after the fact. Ignore it. */
-       if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
-               goto out;
-
        if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
                /*
                 * If we can evict the inode, give the remote node trying to
@@ -2112,8 +2106,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
                *p++ = 'o';
        if (test_bit(GLF_BLOCKING, gflags))
                *p++ = 'b';
-       if (test_bit(GLF_INODE_CREATING, gflags))
-               *p++ = 'c';
        if (test_bit(GLF_PENDING_DELETE, gflags))
                *p++ = 'P';
        if (test_bit(GLF_FREEING, gflags))
index f8858d9..8e1ab8e 100644 (file)
@@ -348,7 +348,6 @@ enum {
        GLF_LRU                         = 13,
        GLF_OBJECT                      = 14, /* Used only for tracing */
        GLF_BLOCKING                    = 15,
-       GLF_INODE_CREATING              = 16, /* Inode creation occurring */
        GLF_PENDING_DELETE              = 17,
        GLF_FREEING                     = 18, /* Wait for glock to be freed */
 };
index 65ae4fc..c1b77e8 100644 (file)
 #include "super.h"
 #include "glops.h"
 
+static const struct inode_operations gfs2_file_iops;
+static const struct inode_operations gfs2_dir_iops;
+static const struct inode_operations gfs2_symlink_iops;
+
 static int iget_test(struct inode *inode, void *opaque)
 {
        u64 no_addr = *(u64 *)opaque;
@@ -605,7 +609,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        struct inode *inode = NULL;
        struct gfs2_inode *dip = GFS2_I(dir), *ip;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-       struct gfs2_glock *io_gl = NULL;
+       struct gfs2_glock *io_gl;
        int error, free_vfs_inode = 1;
        u32 aflags = 0;
        unsigned blocks = 1;
@@ -746,8 +750,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        init_dinode(dip, ip, symname);
        gfs2_trans_end(sdp);
 
-       BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
-
        error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
        if (error)
                goto fail_gunlock2;
@@ -793,7 +795,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        gfs2_glock_dq_uninit(ghs);
        gfs2_qa_put(ip);
        gfs2_glock_dq_uninit(ghs + 1);
-       clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
        gfs2_glock_put(io_gl);
        gfs2_qa_put(dip);
        return error;
@@ -802,7 +803,6 @@ fail_gunlock3:
        glock_clear_object(io_gl, ip);
        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 fail_gunlock2:
-       clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
        glock_clear_object(io_gl, ip);
        gfs2_glock_put(io_gl);
 fail_free_inode:
@@ -2136,7 +2136,7 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
        return generic_update_time(inode, time, flags);
 }
 
-const struct inode_operations gfs2_file_iops = {
+static const struct inode_operations gfs2_file_iops = {
        .permission = gfs2_permission,
        .setattr = gfs2_setattr,
        .getattr = gfs2_getattr,
@@ -2147,7 +2147,7 @@ const struct inode_operations gfs2_file_iops = {
        .update_time = gfs2_update_time,
 };
 
-const struct inode_operations gfs2_dir_iops = {
+static const struct inode_operations gfs2_dir_iops = {
        .create = gfs2_create,
        .lookup = gfs2_lookup,
        .link = gfs2_link,
@@ -2168,7 +2168,7 @@ const struct inode_operations gfs2_dir_iops = {
        .atomic_open = gfs2_atomic_open,
 };
 
-const struct inode_operations gfs2_symlink_iops = {
+static const struct inode_operations gfs2_symlink_iops = {
        .get_link = gfs2_get_link,
        .permission = gfs2_permission,
        .setattr = gfs2_setattr,
index b52ecf4..8073b8d 100644 (file)
@@ -107,9 +107,6 @@ extern int gfs2_open_common(struct inode *inode, struct file *file);
 extern loff_t gfs2_seek_data(struct file *file, loff_t offset);
 extern loff_t gfs2_seek_hole(struct file *file, loff_t offset);
 
-extern const struct inode_operations gfs2_file_iops;
-extern const struct inode_operations gfs2_dir_iops;
-extern const struct inode_operations gfs2_symlink_iops;
 extern const struct file_operations gfs2_file_fops_nolock;
 extern const struct file_operations gfs2_dir_fops_nolock;
 
index b3d951a..2f56acc 100644 (file)
@@ -353,7 +353,6 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
        struct buffer_head *m_bh, *l_bh;
        int error;
 
-       sb_start_write(sb);
        error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
                                   &gh);
        if (error)
@@ -392,7 +391,6 @@ out_bh:
 out_unlock:
        gfs2_glock_dq_uninit(&gh);
 out:
-       sb_end_write(sb);
        return error;
 }
 
index 0fba3bf..a374397 100644 (file)
@@ -137,7 +137,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
        gfs2_glock_dq(&sdp->sd_jinode_gh);
        if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
                /* Make sure gfs2_unfreeze works if partially-frozen */
-               flush_workqueue(gfs2_freeze_wq);
+               flush_work(&sdp->sd_freeze_work);
                atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
                thaw_super(sdp->sd_vfs);
        } else {
index d756298..a4443dd 100644 (file)
@@ -151,7 +151,7 @@ extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
                               bool verbose);
 
 #define gfs2_io_error(sdp) \
-gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
+gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__)
 
 
 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -159,10 +159,10 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
                        bool withdraw);
 
 #define gfs2_io_error_bh_wd(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true)
 
 #define gfs2_io_error_bh(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false)
 
 
 extern struct kmem_cache *gfs2_glock_cachep;
index 4fd9be4..40e203b 100644 (file)
@@ -13,6 +13,7 @@
 #ifndef _JFFS2_DEBUG_H_
 #define _JFFS2_DEBUG_H_
 
+#include <linux/printk.h>
 #include <linux/sched.h>
 
 #ifndef CONFIG_JFFS2_FS_DEBUG
@@ -99,73 +100,73 @@ do {                                               \
 #ifdef JFFS2_DBG_READINODE_MESSAGES
 #define dbg_readinode(fmt, ...)        JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_readinode(fmt, ...)
+#define dbg_readinode(fmt, ...)        no_printk(fmt, ##__VA_ARGS__)
 #endif
 #ifdef JFFS2_DBG_READINODE2_MESSAGES
 #define dbg_readinode2(fmt, ...)       JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_readinode2(fmt, ...)
+#define dbg_readinode2(fmt, ...)       no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Fragtree build debugging messages */
 #ifdef JFFS2_DBG_FRAGTREE_MESSAGES
 #define dbg_fragtree(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_fragtree(fmt, ...)
+#define dbg_fragtree(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 #ifdef JFFS2_DBG_FRAGTREE2_MESSAGES
 #define dbg_fragtree2(fmt, ...)        JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_fragtree2(fmt, ...)
+#define dbg_fragtree2(fmt, ...)        no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Directory entry list manilulation debugging messages */
 #ifdef JFFS2_DBG_DENTLIST_MESSAGES
 #define dbg_dentlist(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_dentlist(fmt, ...)
+#define dbg_dentlist(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Print the messages about manipulating node_refs */
 #ifdef JFFS2_DBG_NODEREF_MESSAGES
 #define dbg_noderef(fmt, ...)  JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_noderef(fmt, ...)
+#define dbg_noderef(fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Manipulations with the list of inodes (JFFS2 inocache) */
 #ifdef JFFS2_DBG_INOCACHE_MESSAGES
 #define dbg_inocache(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_inocache(fmt, ...)
+#define dbg_inocache(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Summary debugging messages */
 #ifdef JFFS2_DBG_SUMMARY_MESSAGES
 #define dbg_summary(fmt, ...)  JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_summary(fmt, ...)
+#define dbg_summary(fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* File system build messages */
 #ifdef JFFS2_DBG_FSBUILD_MESSAGES
 #define dbg_fsbuild(fmt, ...)  JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_fsbuild(fmt, ...)
+#define dbg_fsbuild(fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Watch the object allocations */
 #ifdef JFFS2_DBG_MEMALLOC_MESSAGES
 #define dbg_memalloc(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_memalloc(fmt, ...)
+#define dbg_memalloc(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Watch the XATTR subsystem */
 #ifdef JFFS2_DBG_XATTR_MESSAGES
 #define dbg_xattr(fmt, ...)  JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_xattr(fmt, ...)
+#define dbg_xattr(fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
 #endif 
 
 /* "Sanity" checks */
index 778275f..5a70917 100644 (file)
@@ -38,6 +38,7 @@ struct jffs2_mount_opts {
         * users. This is implemented simply by means of not allowing the
         * latter users to write to the file system if the amount if the
         * available space is less then 'rp_size'. */
+       bool set_rp_size;
        unsigned int rp_size;
 };
 
index 8ff4d1a..2e98fa2 100644 (file)
@@ -349,14 +349,14 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
 #define frag_parent(frag) rb_entry(rb_parent(&(frag)->rb), struct jffs2_node_frag, rb)
 #define frag_left(frag) rb_entry((frag)->rb.rb_left, struct jffs2_node_frag, rb)
 #define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb)
-#define frag_erase(frag, list) rb_erase(&frag->rb, list);
+#define frag_erase(frag, list) rb_erase(&frag->rb, list)
 
 #define tn_next(tn) rb_entry(rb_next(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
 #define tn_prev(tn) rb_entry(rb_prev(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
 #define tn_parent(tn) rb_entry(rb_parent(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
 #define tn_left(tn) rb_entry((tn)->rb.rb_left, struct jffs2_tmp_dnode_info, rb)
 #define tn_right(tn) rb_entry((tn)->rb.rb_right, struct jffs2_tmp_dnode_info, rb)
-#define tn_erase(tn, list) rb_erase(&tn->rb, list);
+#define tn_erase(tn, list) rb_erase(&tn->rb, list)
 #define tn_last(list) rb_entry(rb_last(list), struct jffs2_tmp_dnode_info, rb)
 #define tn_first(list) rb_entry(rb_first(list), struct jffs2_tmp_dnode_info, rb)
 
index 2f6f0b1..03b4f99 100644 (file)
@@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
                        jffs2_free_full_dirent(fd);
                        return -EIO;
                }
+
+#ifdef CONFIG_JFFS2_SUMMARY
+               /*
+                * we use CONFIG_JFFS2_SUMMARY because without it, we
+                * have checked it while mounting
+                */
+               crc = crc32(0, fd->name, rd->nsize);
+               if (unlikely(crc != je32_to_cpu(rd->name_crc))) {
+                       JFFS2_NOTICE("name CRC failed on dirent node at"
+                          "%#08x: read %#08x,calculated %#08x\n",
+                          ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
+                       jffs2_mark_node_obsolete(c, ref);
+                       jffs2_free_full_dirent(fd);
+                       return 0;
+               }
+#endif
        }
 
        fd->nhash = full_name_hash(NULL, fd->name, rd->nsize);
index 05d7878..81ca58c 100644 (file)
@@ -88,7 +88,7 @@ static int jffs2_show_options(struct seq_file *s, struct dentry *root)
 
        if (opts->override_compr)
                seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr));
-       if (opts->rp_size)
+       if (opts->set_rp_size)
                seq_printf(s, ",rp_size=%u", opts->rp_size / 1024);
 
        return 0;
@@ -202,11 +202,8 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
        case Opt_rp_size:
                if (result.uint_32 > UINT_MAX / 1024)
                        return invalf(fc, "jffs2: rp_size unrepresentable");
-               opt = result.uint_32 * 1024;
-               if (opt > c->mtd->size)
-                       return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB",
-                                     c->mtd->size / 1024);
-               c->mount_opts.rp_size = opt;
+               c->mount_opts.rp_size = result.uint_32 * 1024;
+               c->mount_opts.set_rp_size = true;
                break;
        default:
                return -EINVAL;
@@ -215,11 +212,30 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
        return 0;
 }
 
+static inline void jffs2_update_mount_opts(struct fs_context *fc)
+{
+       struct jffs2_sb_info *new_c = fc->s_fs_info;
+       struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb);
+
+       mutex_lock(&c->alloc_sem);
+       if (new_c->mount_opts.override_compr) {
+               c->mount_opts.override_compr = new_c->mount_opts.override_compr;
+               c->mount_opts.compr = new_c->mount_opts.compr;
+       }
+       if (new_c->mount_opts.set_rp_size) {
+               c->mount_opts.set_rp_size = new_c->mount_opts.set_rp_size;
+               c->mount_opts.rp_size = new_c->mount_opts.rp_size;
+       }
+       mutex_unlock(&c->alloc_sem);
+}
+
 static int jffs2_reconfigure(struct fs_context *fc)
 {
        struct super_block *sb = fc->root->d_sb;
 
        sync_filesystem(sb);
+       jffs2_update_mount_opts(fc);
+
        return jffs2_do_remount_fs(sb, fc);
 }
 
@@ -249,6 +265,10 @@ static int jffs2_fill_super(struct super_block *sb, struct fs_context *fc)
        c->mtd = sb->s_mtd;
        c->os_priv = sb;
 
+       if (c->mount_opts.rp_size > c->mtd->size)
+               return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB",
+                             c->mtd->size / 1024);
+
        /* Initialize JFFS2 superblock locks, the further initialization will
         * be done later */
        mutex_init(&c->alloc_sem);
index af375e0..ec8ae42 100644 (file)
@@ -663,6 +663,8 @@ const struct file_operations orangefs_file_operations = {
        .unlocked_ioctl = orangefs_ioctl,
        .mmap           = orangefs_file_mmap,
        .open           = generic_file_open,
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = iter_file_splice_write,
        .flush          = orangefs_flush,
        .release        = orangefs_file_release,
        .fsync          = orangefs_fsync,
index 0886d83..51a7c8c 100644 (file)
@@ -337,8 +337,10 @@ int ubifs_init_authentication(struct ubifs_info *c)
        c->authenticated = true;
 
        c->log_hash = ubifs_hash_get_desc(c);
-       if (IS_ERR(c->log_hash))
+       if (IS_ERR(c->log_hash)) {
+               err = PTR_ERR(c->log_hash);
                goto out_free_hmac;
+       }
 
        err = 0;
 
index b5cdac9..c4fc104 100644 (file)
@@ -701,13 +701,13 @@ out:
 
 out_dump:
        ubifs_err(c, "dumping index node (iip=%d)", i->iip);
-       ubifs_dump_node(c, idx);
+       ubifs_dump_node(c, idx, ubifs_idx_node_sz(c, c->fanout));
        list_del(&i->list);
        kfree(i);
        if (!list_empty(&list)) {
                i = list_entry(list.prev, struct idx_node, list);
                ubifs_err(c, "dumping parent index node");
-               ubifs_dump_node(c, &i->idx);
+               ubifs_dump_node(c, &i->idx, ubifs_idx_node_sz(c, c->fanout));
        }
 out_free:
        while (!list_empty(&list)) {
index ebff43f..1bbb9fe 100644 (file)
@@ -291,9 +291,9 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
        kfree(pdent);
 }
 
-void ubifs_dump_node(const struct ubifs_info *c, const void *node)
+void ubifs_dump_node(const struct ubifs_info *c, const void *node, int node_len)
 {
-       int i, n;
+       int i, n, type, safe_len, max_node_len, min_node_len;
        union ubifs_key key;
        const struct ubifs_ch *ch = node;
        char key_buf[DBG_KEY_BUF_LEN];
@@ -306,10 +306,40 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                return;
        }
 
+       /* Skip dumping unknown type node */
+       type = ch->node_type;
+       if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
+               pr_err("node type %d was not recognized\n", type);
+               return;
+       }
+
        spin_lock(&dbg_lock);
        dump_ch(node);
 
-       switch (ch->node_type) {
+       if (c->ranges[type].max_len == 0) {
+               max_node_len = min_node_len = c->ranges[type].len;
+       } else {
+               max_node_len = c->ranges[type].max_len;
+               min_node_len = c->ranges[type].min_len;
+       }
+       safe_len = le32_to_cpu(ch->len);
+       safe_len = safe_len > 0 ? safe_len : 0;
+       safe_len = min3(safe_len, max_node_len, node_len);
+       if (safe_len < min_node_len) {
+               pr_err("node len(%d) is too short for %s, left %d bytes:\n",
+                      safe_len, dbg_ntype(type),
+                      safe_len > UBIFS_CH_SZ ?
+                      safe_len - (int)UBIFS_CH_SZ : 0);
+               if (safe_len > UBIFS_CH_SZ)
+                       print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
+                                      (void *)node + UBIFS_CH_SZ,
+                                      safe_len - UBIFS_CH_SZ, 0);
+               goto out_unlock;
+       }
+       if (safe_len != le32_to_cpu(ch->len))
+               pr_err("\ttruncated node length      %d\n", safe_len);
+
+       switch (type) {
        case UBIFS_PAD_NODE:
        {
                const struct ubifs_pad_node *pad = node;
@@ -453,7 +483,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                pr_err("\tnlen           %d\n", nlen);
                pr_err("\tname           ");
 
-               if (nlen > UBIFS_MAX_NLEN)
+               if (nlen > UBIFS_MAX_NLEN ||
+                   nlen > safe_len - UBIFS_DENT_NODE_SZ)
                        pr_err("(bad name length, not printing, bad or corrupted node)");
                else {
                        for (i = 0; i < nlen && dent->name[i]; i++)
@@ -467,7 +498,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
        case UBIFS_DATA_NODE:
        {
                const struct ubifs_data_node *dn = node;
-               int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
 
                key_read(c, &dn->key, &key);
                pr_err("\tkey            %s\n",
@@ -475,10 +505,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                pr_err("\tsize           %u\n", le32_to_cpu(dn->size));
                pr_err("\tcompr_typ      %d\n",
                       (int)le16_to_cpu(dn->compr_type));
-               pr_err("\tdata size      %d\n", dlen);
-               pr_err("\tdata:\n");
+               pr_err("\tdata size      %u\n",
+                      le32_to_cpu(ch->len) - (unsigned int)UBIFS_DATA_NODE_SZ);
+               pr_err("\tdata (length = %d):\n",
+                      safe_len - (int)UBIFS_DATA_NODE_SZ);
                print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
-                              (void *)&dn->data, dlen, 0);
+                              (void *)&dn->data,
+                              safe_len - (int)UBIFS_DATA_NODE_SZ, 0);
                break;
        }
        case UBIFS_TRUN_NODE:
@@ -495,13 +528,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
        case UBIFS_IDX_NODE:
        {
                const struct ubifs_idx_node *idx = node;
+               int max_child_cnt = (safe_len - UBIFS_IDX_NODE_SZ) /
+                                   (ubifs_idx_node_sz(c, 1) -
+                                   UBIFS_IDX_NODE_SZ);
 
-               n = le16_to_cpu(idx->child_cnt);
-               pr_err("\tchild_cnt      %d\n", n);
+               n = min_t(int, le16_to_cpu(idx->child_cnt), max_child_cnt);
+               pr_err("\tchild_cnt      %d\n", (int)le16_to_cpu(idx->child_cnt));
                pr_err("\tlevel          %d\n", (int)le16_to_cpu(idx->level));
                pr_err("\tBranches:\n");
 
-               for (i = 0; i < n && i < c->fanout - 1; i++) {
+               for (i = 0; i < n && i < c->fanout; i++) {
                        const struct ubifs_branch *br;
 
                        br = ubifs_idx_branch(c, idx, i);
@@ -525,7 +561,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                                le64_to_cpu(orph->cmt_no) & LLONG_MAX);
                pr_err("\tlast node flag %llu\n",
                       (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
-               n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
+               n = (safe_len - UBIFS_ORPH_NODE_SZ) >> 3;
                pr_err("\t%d orphan inode numbers:\n", n);
                for (i = 0; i < n; i++)
                        pr_err("\t  ino %llu\n",
@@ -537,9 +573,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
                break;
        }
        default:
-               pr_err("node type %d was not recognized\n",
-                      (int)ch->node_type);
+               pr_err("node type %d was not recognized\n", type);
        }
+
+out_unlock:
        spin_unlock(&dbg_lock);
 }
 
@@ -764,7 +801,7 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
        pr_err("\tnnode_sz:      %d\n", c->nnode_sz);
        pr_err("\tltab_sz:       %d\n", c->ltab_sz);
        pr_err("\tlsave_sz:      %d\n", c->lsave_sz);
-       pr_err("\tbig_lpt:       %d\n", c->big_lpt);
+       pr_err("\tbig_lpt:       %u\n", c->big_lpt);
        pr_err("\tlpt_hght:      %d\n", c->lpt_hght);
        pr_err("\tpnode_cnt:     %d\n", c->pnode_cnt);
        pr_err("\tnnode_cnt:     %d\n", c->nnode_cnt);
@@ -791,22 +828,6 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
        spin_unlock(&dbg_lock);
 }
 
-void ubifs_dump_sleb(const struct ubifs_info *c,
-                    const struct ubifs_scan_leb *sleb, int offs)
-{
-       struct ubifs_scan_node *snod;
-
-       pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n",
-              current->pid, sleb->lnum, offs);
-
-       list_for_each_entry(snod, &sleb->nodes, list) {
-               cond_resched();
-               pr_err("Dumping node at LEB %d:%d len %d\n",
-                      sleb->lnum, snod->offs, snod->len);
-               ubifs_dump_node(c, snod->node);
-       }
-}
-
 void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 {
        struct ubifs_scan_leb *sleb;
@@ -834,7 +855,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
                cond_resched();
                pr_err("Dumping node at LEB %d:%d len %d\n", lnum,
                       snod->offs, snod->len);
-               ubifs_dump_node(c, snod->node);
+               ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
        }
 
        pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
@@ -1012,7 +1033,7 @@ void dbg_save_space_info(struct ubifs_info *c)
  *
  * This function compares current flash space information with the information
  * which was saved when the 'dbg_save_space_info()' function was called.
- * Returns zero if the information has not changed, and %-EINVAL it it has
+ * Returns zero if the information has not changed, and %-EINVAL if it has
  * changed.
  */
 int dbg_check_space_info(struct ubifs_info *c)
@@ -1212,7 +1233,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
                ubifs_err(c, "but it should have key %s according to tnc",
                          dbg_snprintf_key(c, &zbr1->key, key_buf,
                                           DBG_KEY_BUF_LEN));
-               ubifs_dump_node(c, dent1);
+               ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ);
                goto out_free;
        }
 
@@ -1224,7 +1245,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
                ubifs_err(c, "but it should have key %s according to tnc",
                          dbg_snprintf_key(c, &zbr2->key, key_buf,
                                           DBG_KEY_BUF_LEN));
-               ubifs_dump_node(c, dent2);
+               ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ);
                goto out_free;
        }
 
@@ -1243,9 +1264,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
                          dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
 
        ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs);
-       ubifs_dump_node(c, dent1);
+       ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ);
        ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs);
-       ubifs_dump_node(c, dent2);
+       ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ);
 
 out_free:
        kfree(dent2);
@@ -2110,7 +2131,7 @@ out:
 
 out_dump:
        ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
-       ubifs_dump_node(c, node);
+       ubifs_dump_node(c, node, zbr->len);
 out_free:
        kfree(node);
        return err;
@@ -2243,7 +2264,7 @@ out_dump:
 
        ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d",
                  (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
-       ubifs_dump_node(c, ino);
+       ubifs_dump_node(c, ino, zbr->len);
        kfree(ino);
        return -EINVAL;
 }
@@ -2314,12 +2335,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
 
                if (sa->type != UBIFS_DATA_NODE) {
                        ubifs_err(c, "bad node type %d", sa->type);
-                       ubifs_dump_node(c, sa->node);
+                       ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
                        return -EINVAL;
                }
                if (sb->type != UBIFS_DATA_NODE) {
                        ubifs_err(c, "bad node type %d", sb->type);
-                       ubifs_dump_node(c, sb->node);
+                       ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
                        return -EINVAL;
                }
 
@@ -2350,8 +2371,8 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
        return 0;
 
 error_dump:
-       ubifs_dump_node(c, sa->node);
-       ubifs_dump_node(c, sb->node);
+       ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
+       ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
        return -EINVAL;
 }
 
@@ -2382,13 +2403,13 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
                if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
                    sa->type != UBIFS_XENT_NODE) {
                        ubifs_err(c, "bad node type %d", sa->type);
-                       ubifs_dump_node(c, sa->node);
+                       ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
                        return -EINVAL;
                }
                if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE &&
                    sb->type != UBIFS_XENT_NODE) {
                        ubifs_err(c, "bad node type %d", sb->type);
-                       ubifs_dump_node(c, sb->node);
+                       ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
                        return -EINVAL;
                }
 
@@ -2438,11 +2459,10 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 
 error_dump:
        ubifs_msg(c, "dumping first node");
-       ubifs_dump_node(c, sa->node);
+       ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
        ubifs_msg(c, "dumping second node");
-       ubifs_dump_node(c, sb->node);
+       ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
        return -EINVAL;
-       return 0;
 }
 
 static inline int chance(unsigned int n, unsigned int out_of)
index 7763639..ed96610 100644 (file)
@@ -242,7 +242,8 @@ const char *dbg_get_key_dump(const struct ubifs_info *c,
 const char *dbg_snprintf_key(const struct ubifs_info *c,
                             const union ubifs_key *key, char *buffer, int len);
 void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode);
-void ubifs_dump_node(const struct ubifs_info *c, const void *node);
+void ubifs_dump_node(const struct ubifs_info *c, const void *node,
+                    int node_len);
 void ubifs_dump_budget_req(const struct ubifs_budget_req *req);
 void ubifs_dump_lstats(const struct ubifs_lp_stats *lst);
 void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
@@ -251,8 +252,6 @@ void ubifs_dump_lprop(const struct ubifs_info *c,
 void ubifs_dump_lprops(struct ubifs_info *c);
 void ubifs_dump_lpt_info(struct ubifs_info *c);
 void ubifs_dump_leb(const struct ubifs_info *c, int lnum);
-void ubifs_dump_sleb(const struct ubifs_info *c,
-                    const struct ubifs_scan_leb *sleb, int offs);
 void ubifs_dump_znode(const struct ubifs_info *c,
                      const struct ubifs_znode *znode);
 void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
index 7949d7c..9a6b866 100644 (file)
@@ -844,7 +844,7 @@ out_fname:
  *
  * This function checks if directory @dir is empty. Returns zero if the
  * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes
- * in case of of errors.
+ * in case of errors.
  */
 int ubifs_check_dir_empty(struct inode *dir)
 {
@@ -1632,9 +1632,7 @@ const struct inode_operations ubifs_dir_inode_operations = {
        .rename      = ubifs_rename,
        .setattr     = ubifs_setattr,
        .getattr     = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
        .listxattr   = ubifs_listxattr,
-#endif
        .update_time = ubifs_update_time,
        .tmpfile     = ubifs_tmpfile,
 };
index b77d163..2bc7780 100644 (file)
@@ -92,7 +92,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
 dump:
        ubifs_err(c, "bad data node (block %u, inode %lu)",
                  block, inode->i_ino);
-       ubifs_dump_node(c, dn);
+       ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ);
        return -EINVAL;
 }
 
@@ -205,7 +205,7 @@ static void release_new_page_budget(struct ubifs_info *c)
  * @c: UBIFS file-system description object
  *
  * This is a helper function which releases budget corresponding to the budget
- * of changing one one page of data which already exists on the flash media.
+ * of changing one page of data which already exists on the flash media.
  */
 static void release_existing_page_budget(struct ubifs_info *c)
 {
@@ -1645,9 +1645,7 @@ const struct address_space_operations ubifs_file_address_operations = {
 const struct inode_operations ubifs_file_inode_operations = {
        .setattr     = ubifs_setattr,
        .getattr     = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
        .listxattr   = ubifs_listxattr,
-#endif
        .update_time = ubifs_update_time,
 };
 
@@ -1655,9 +1653,7 @@ const struct inode_operations ubifs_symlink_inode_operations = {
        .get_link    = ubifs_get_link,
        .setattr     = ubifs_setattr,
        .getattr     = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
        .listxattr   = ubifs_listxattr,
-#endif
        .update_time = ubifs_update_time,
 };
 
index 7e4bfaf..00b61db 100644 (file)
@@ -198,6 +198,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
  * ubifs_check_node - check node.
  * @c: UBIFS file-system description object
  * @buf: node to check
+ * @len: node length
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
  * @quiet: print no messages
@@ -222,10 +223,10 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
  * This function returns zero in case of success and %-EUCLEAN in case of bad
  * CRC or magic.
  */
-int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-                    int offs, int quiet, int must_chk_crc)
+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len,
+                    int lnum, int offs, int quiet, int must_chk_crc)
 {
-       int err = -EINVAL, type, node_len, dump_node = 1;
+       int err = -EINVAL, type, node_len;
        uint32_t crc, node_crc, magic;
        const struct ubifs_ch *ch = buf;
 
@@ -278,22 +279,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
 out_len:
        if (!quiet)
                ubifs_err(c, "bad node length %d", node_len);
-       if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ)
-               dump_node = 0;
 out:
        if (!quiet) {
                ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
-               if (dump_node) {
-                       ubifs_dump_node(c, buf);
-               } else {
-                       int safe_len = min3(node_len, c->leb_size - offs,
-                               (int)UBIFS_MAX_DATA_NODE_SZ);
-                       pr_err("\tprevent out-of-bounds memory access\n");
-                       pr_err("\ttruncated data node length      %d\n", safe_len);
-                       pr_err("\tcorrupted data node:\n");
-                       print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
-                                       buf, safe_len, 0);
-               }
+               ubifs_dump_node(c, buf, len);
                dump_stack();
        }
        return err;
@@ -319,7 +308,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
 {
        uint32_t crc;
 
-       ubifs_assert(c, pad >= 0 && !(pad & 7));
+       ubifs_assert(c, pad >= 0);
 
        if (pad >= UBIFS_PAD_NODE_SZ) {
                struct ubifs_ch *ch = buf;
@@ -730,7 +719,7 @@ out_timers:
 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 {
        struct ubifs_info *c = wbuf->c;
-       int err, written, n, aligned_len = ALIGN(len, 8);
+       int err, n, written = 0, aligned_len = ALIGN(len, 8);
 
        dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
               dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -764,6 +753,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                 * write-buffer.
                 */
                memcpy(wbuf->buf + wbuf->used, buf, len);
+               if (aligned_len > len) {
+                       ubifs_assert(c, aligned_len - len < 8);
+                       ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+               }
 
                if (aligned_len == wbuf->avail) {
                        dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -793,8 +786,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
                goto exit;
        }
 
-       written = 0;
-
        if (wbuf->used) {
                /*
                 * The node is large enough and does not fit entirely within
@@ -856,13 +847,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
        }
 
        spin_lock(&wbuf->lock);
-       if (aligned_len)
+       if (aligned_len) {
                /*
                 * And now we have what's left and what does not take whole
                 * max. write unit, so write it to the write-buffer and we are
                 * done.
                 */
                memcpy(wbuf->buf, buf + written, len);
+               if (aligned_len > len) {
+                       ubifs_assert(c, aligned_len - len < 8);
+                       ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+               }
+       }
 
        if (c->leb_size - wbuf->offs >= c->max_write_size)
                wbuf->size = c->max_write_size;
@@ -890,7 +886,7 @@ exit:
 out:
        ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d",
                  len, wbuf->lnum, wbuf->offs, err);
-       ubifs_dump_node(c, buf);
+       ubifs_dump_node(c, buf, written + len);
        dump_stack();
        ubifs_dump_leb(c, wbuf->lnum);
        return err;
@@ -933,7 +929,7 @@ int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
 
        err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
        if (err)
-               ubifs_dump_node(c, buf);
+               ubifs_dump_node(c, buf, len);
 
        return err;
 }
@@ -1016,7 +1012,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
                goto out;
        }
 
-       err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
+       err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0);
        if (err) {
                ubifs_err(c, "expected node type %d", type);
                return err;
@@ -1032,7 +1028,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 
 out:
        ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
-       ubifs_dump_node(c, buf);
+       ubifs_dump_node(c, buf, len);
        dump_stack();
        return -EINVAL;
 }
@@ -1046,7 +1042,7 @@ out:
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
  *
- * This function reads a node of known type and and length, checks it and
+ * This function reads a node of known type and length, checks it and
  * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
  * and a negative error code in case of failure.
  */
@@ -1072,7 +1068,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
                goto out;
        }
 
-       err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
+       err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0);
        if (err) {
                ubifs_errc(c, "expected node type %d", type);
                return err;
@@ -1090,7 +1086,7 @@ out:
        ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum,
                   offs, ubi_is_mapped(c->ubi, lnum));
        if (!c->probing) {
-               ubifs_dump_node(c, buf);
+               ubifs_dump_node(c, buf, len);
                dump_stack();
        }
        return -EINVAL;
index 091c2ad..03410ae 100644 (file)
@@ -1559,7 +1559,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
                        if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
                                ubifs_err(c, "bad data node (block %u, inode %lu)",
                                          blk, inode->i_ino);
-                               ubifs_dump_node(c, dn);
+                               ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ -
+                                               UBIFS_TRUN_NODE_SZ);
                                goto out_free;
                        }
 
index 6e0a153..778a22b 100644 (file)
@@ -851,7 +851,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
        dbg_lp("lsave_sz %d", c->lsave_sz);
        dbg_lp("lsave_cnt %d", c->lsave_cnt);
        dbg_lp("lpt_hght %d", c->lpt_hght);
-       dbg_lp("big_lpt %d", c->big_lpt);
+       dbg_lp("big_lpt %u", c->big_lpt);
        dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
        dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
        dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
@@ -1824,7 +1824,7 @@ static int lpt_init_rd(struct ubifs_info *c)
        dbg_lp("lsave_sz %d", c->lsave_sz);
        dbg_lp("lsave_cnt %d", c->lsave_cnt);
        dbg_lp("lpt_hght %d", c->lpt_hght);
-       dbg_lp("big_lpt %d", c->big_lpt);
+       dbg_lp("big_lpt %u", c->big_lpt);
        dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
        dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
        dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
index 911d055..0df9a3d 100644 (file)
@@ -314,7 +314,7 @@ static int validate_master(const struct ubifs_info *c)
 
 out:
        ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err);
-       ubifs_dump_node(c, c->mst_node);
+       ubifs_dump_node(c, c->mst_node, c->mst_node_alsz);
        return -EINVAL;
 }
 
@@ -392,7 +392,7 @@ int ubifs_read_master(struct ubifs_info *c)
                if (c->leb_cnt < old_leb_cnt ||
                    c->leb_cnt < UBIFS_MIN_LEB_CNT) {
                        ubifs_err(c, "bad leb_cnt on master node");
-                       ubifs_dump_node(c, c->mst_node);
+                       ubifs_dump_node(c, c->mst_node, c->mst_node_alsz);
                        return -EINVAL;
                }
 
index 0fb6195..4909321 100644 (file)
@@ -646,7 +646,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
                if (snod->type != UBIFS_ORPH_NODE) {
                        ubifs_err(c, "invalid node type %d in orphan area at %d:%d",
                                  snod->type, sleb->lnum, snod->offs);
-                       ubifs_dump_node(c, snod->node);
+                       ubifs_dump_node(c, snod->node,
+                                       c->leb_size - snod->offs);
                        err = -EINVAL;
                        goto out_free;
                }
@@ -674,7 +675,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
                        if (!first) {
                                ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d",
                                          cmt_no, sleb->lnum, snod->offs);
-                               ubifs_dump_node(c, snod->node);
+                               ubifs_dump_node(c, snod->node,
+                                               c->leb_size - snod->offs);
                                err = -EINVAL;
                                goto out_free;
                        }
index f116f7b..f0d51dd 100644 (file)
@@ -352,11 +352,11 @@ out_free:
        ubifs_err(c, "failed to recover master node");
        if (mst1) {
                ubifs_err(c, "dumping first master node");
-               ubifs_dump_node(c, mst1);
+               ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1));
        }
        if (mst2) {
                ubifs_err(c, "dumping second master node");
-               ubifs_dump_node(c, mst2);
+               ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2));
        }
        vfree(buf2);
        vfree(buf1);
@@ -469,7 +469,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
         * The area after the common header size is not empty, so the common
         * header must be intact. Check it.
         */
-       if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
+       if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) {
                dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
                return 0;
        }
index 2f8d8f4..79801c9 100644 (file)
@@ -574,7 +574,7 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h
  * @c: UBIFS file-system description object
  * @sleb: the scan LEB to authenticate
  * @log_hash:
- * @is_last: if true, this is is the last LEB
+ * @is_last: if true, this is the last LEB
  *
  * This function iterates over the buds of a single LEB authenticating all buds
  * with the authentication nodes on this LEB. Authentication nodes are written
@@ -827,7 +827,7 @@ out:
 
 out_dump:
        ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
-       ubifs_dump_node(c, snod->node);
+       ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
        ubifs_scan_destroy(sleb);
        return -EINVAL;
 }
@@ -1123,7 +1123,7 @@ out:
 out_dump:
        ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
                  lnum, offs + snod->offs);
-       ubifs_dump_node(c, snod->node);
+       ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
        ubifs_scan_destroy(sleb);
        return -EINVAL;
 }
index c0d3e40..c160f71 100644 (file)
@@ -503,7 +503,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
 
 failed:
        ubifs_err(c, "bad superblock, error %d", err);
-       ubifs_dump_node(c, sup);
+       ubifs_dump_node(c, sup, ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size));
        return -EINVAL;
 }
 
index c69cdb5..84a9157 100644 (file)
@@ -76,7 +76,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
        dbg_scan("scanning %s at LEB %d:%d",
                 dbg_ntype(ch->node_type), lnum, offs);
 
-       if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
+       if (ubifs_check_node(c, buf, len, lnum, offs, quiet, 1))
                return SCANNED_A_CORRUPT_NODE;
 
        if (ch->node_type == UBIFS_PAD_NODE) {
@@ -90,7 +90,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
                        if (!quiet) {
                                ubifs_err(c, "bad pad node at LEB %d:%d",
                                          lnum, offs);
-                               ubifs_dump_node(c, pad);
+                               ubifs_dump_node(c, pad, len);
                        }
                        return SCANNED_A_BAD_PAD_NODE;
                }
index cb3acfb..138b942 100644 (file)
@@ -253,7 +253,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
 
 out_invalid:
        ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err);
-       ubifs_dump_node(c, ino);
+       ubifs_dump_node(c, ino, UBIFS_MAX_INO_NODE_SZ);
        ubifs_dump_inode(c, inode);
        err = -EINVAL;
 out_ino:
@@ -1572,7 +1572,7 @@ static int mount_ubifs(struct ubifs_info *c)
        dbg_gen("main area LEBs:      %d (%d - %d)",
                c->main_lebs, c->main_first, c->leb_cnt - 1);
        dbg_gen("index LEBs:          %d", c->lst.idx_lebs);
-       dbg_gen("total index bytes:   %lld (%lld KiB, %lld MiB)",
+       dbg_gen("total index bytes:   %llu (%llu KiB, %llu MiB)",
                c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
                c->bi.old_idx_sz >> 20);
        dbg_gen("key hash type:       %d", c->key_hash_type);
@@ -2207,9 +2207,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
        if (c->max_inode_sz > MAX_LFS_FILESIZE)
                sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
        sb->s_op = &ubifs_super_operations;
-#ifdef CONFIG_UBIFS_FS_XATTR
        sb->s_xattr = ubifs_xattr_handlers;
-#endif
        fscrypt_set_ops(sb, &ubifs_crypt_operations);
 
        mutex_lock(&c->umount_mutex);
index 894f1ab..488f3da 100644 (file)
@@ -316,7 +316,7 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
        err = ubifs_validate_entry(c, dent);
        if (err) {
                dump_stack();
-               ubifs_dump_node(c, dent);
+               ubifs_dump_node(c, dent, zbr->len);
                return err;
        }
 
@@ -349,7 +349,7 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
        err = ubifs_validate_entry(c, node);
        if (err) {
                dump_stack();
-               ubifs_dump_node(c, node);
+               ubifs_dump_node(c, node, zbr->len);
                return err;
        }
 
@@ -377,7 +377,7 @@ static void lnc_free(struct ubifs_zbranch *zbr)
  *
  * This function reads a "hashed" node defined by @zbr from the leaf node cache
  * (in it is there) or from the hash media, in which case the node is also
- * added to LNC. Returns zero in case of success or a negative negative error
+ * added to LNC. Returns zero in case of success or a negative error
  * code in case of failure.
  */
 static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
@@ -1699,7 +1699,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
                goto out_err;
        }
 
-       err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
+       err = ubifs_check_node(c, buf, zbr->len, zbr->lnum, zbr->offs, 0, 0);
        if (err) {
                ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE);
                goto out;
@@ -1733,7 +1733,7 @@ out_err:
        err = -EINVAL;
 out:
        ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs);
-       ubifs_dump_node(c, buf);
+       ubifs_dump_node(c, buf, zbr->len);
        dump_stack();
        return err;
 }
index ccaf94e..4d686e3 100644 (file)
@@ -390,7 +390,7 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
 
 out_dump:
        ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
-       ubifs_dump_node(c, idx);
+       ubifs_dump_node(c, idx, c->max_idx_node_sz);
        kfree(idx);
        return -EINVAL;
 }
@@ -455,8 +455,7 @@ out:
  * @node: node is returned here
  *
  * This function reads a node defined by @zbr from the flash media. Returns
- * zero in case of success or a negative negative error code in case of
- * failure.
+ * zero in case of success or a negative error code in case of failure.
  */
 int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
                        void *node)
@@ -489,7 +488,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
                          zbr->lnum, zbr->offs);
                dbg_tnck(key, "looked for key ");
                dbg_tnck(&key1, "but found node's key ");
-               ubifs_dump_node(c, node);
+               ubifs_dump_node(c, node, zbr->len);
                return -EINVAL;
        }
 
index 4ffd832..fc2cdde 100644 (file)
@@ -1719,8 +1719,8 @@ int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
                     int offs);
 int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
                          int offs, int hmac_offs);
-int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-                    int offs, int quiet, int must_chk_crc);
+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len,
+                    int lnum, int offs, int quiet, int must_chk_crc);
 void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad);
 void ubifs_crc_node(struct ubifs_info *c, void *buf, int len);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
@@ -2000,17 +2000,19 @@ int ubifs_getattr(const struct path *path, struct kstat *stat,
 int ubifs_check_dir_empty(struct inode *dir);
 
 /* xattr.c */
-extern const struct xattr_handler *ubifs_xattr_handlers[];
-ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
                    size_t size, int flags, bool check_lock);
 ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
                        size_t size);
 
 #ifdef CONFIG_UBIFS_FS_XATTR
+extern const struct xattr_handler *ubifs_xattr_handlers[];
+ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
 int ubifs_purge_xattrs(struct inode *host);
 #else
+#define ubifs_listxattr NULL
+#define ubifs_xattr_handlers NULL
 static inline void ubifs_evict_xattr_inode(struct ubifs_info *c,
                                           ino_t xattr_inum) { }
 static inline int ubifs_purge_xattrs(struct inode *host)
index 1564001..7cb9f06 100644 (file)
@@ -690,9 +690,9 @@ xfs_alloc_read_agfl(
        xfs_mount_t     *mp,            /* mount point structure */
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_agnumber_t  agno,           /* allocation group number */
-       xfs_buf_t       **bpp)          /* buffer for the ag free block array */
+       struct xfs_buf  **bpp)          /* buffer for the ag free block array */
 {
-       xfs_buf_t       *bp;            /* return value */
+       struct xfs_buf  *bp;            /* return value */
        int             error;
 
        ASSERT(agno != NULLAGNUMBER);
@@ -2647,12 +2647,12 @@ out_no_agbp:
 int                            /* error */
 xfs_alloc_get_freelist(
        xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_buf_t       *agbp,  /* buffer containing the agf structure */
+       struct xfs_buf  *agbp,  /* buffer containing the agf structure */
        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
        int             btreeblk) /* destination is a AGF btree */
 {
        struct xfs_agf  *agf = agbp->b_addr;
-       xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
+       struct xfs_buf  *agflbp;/* buffer for a.g. freelist structure */
        xfs_agblock_t   bno;    /* block number returned */
        __be32          *agfl_bno;
        int             error;
@@ -2711,7 +2711,7 @@ xfs_alloc_get_freelist(
 void
 xfs_alloc_log_agf(
        xfs_trans_t     *tp,    /* transaction pointer */
-       xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
+       struct xfs_buf  *bp,    /* buffer for a.g. freelist header */
        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
 {
        int     first;          /* first byte offset */
@@ -2757,7 +2757,7 @@ xfs_alloc_pagf_init(
        xfs_agnumber_t          agno,   /* allocation group number */
        int                     flags)  /* XFS_ALLOC_FLAGS_... */
 {
-       xfs_buf_t               *bp;
+       struct xfs_buf          *bp;
        int                     error;
 
        error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
@@ -2772,8 +2772,8 @@ xfs_alloc_pagf_init(
 int                                    /* error */
 xfs_alloc_put_freelist(
        xfs_trans_t             *tp,    /* transaction pointer */
-       xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
-       xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
+       struct xfs_buf          *agbp,  /* buffer for a.g. freelist header */
+       struct xfs_buf          *agflbp,/* buffer for a.g. free block array */
        xfs_agblock_t           bno,    /* block being freed */
        int                     btreeblk) /* block came from a AGF btree */
 {
index d9a6924..bc44641 100644 (file)
@@ -321,7 +321,7 @@ xfs_bmap_check_leaf_extents(
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
        struct xfs_btree_block  *block; /* current btree block */
        xfs_fsblock_t           bno;    /* block # of "block" */
-       xfs_buf_t               *bp;    /* buffer for "block" */
+       struct xfs_buf          *bp;    /* buffer for "block" */
        int                     error;  /* error return value */
        xfs_extnum_t            i=0, j; /* index into the extents list */
        int                     level;  /* btree level, for checking */
@@ -592,7 +592,7 @@ xfs_bmap_btree_to_extents(
        struct xfs_btree_block  *rblock = ifp->if_broot;
        struct xfs_btree_block  *cblock;/* child btree block */
        xfs_fsblock_t           cbno;   /* child block number */
-       xfs_buf_t               *cbp;   /* child block's buffer */
+       struct xfs_buf          *cbp;   /* child block's buffer */
        int                     error;  /* error return value */
        __be64                  *pp;    /* ptr to block address */
        struct xfs_owner_info   oinfo;
@@ -830,7 +830,7 @@ xfs_bmap_local_to_extents(
        int             flags;          /* logging flags returned */
        struct xfs_ifork *ifp;          /* inode fork pointer */
        xfs_alloc_arg_t args;           /* allocation arguments */
-       xfs_buf_t       *bp;            /* buffer for extent block */
+       struct xfs_buf  *bp;            /* buffer for extent block */
        struct xfs_bmbt_irec rec;
        struct xfs_iext_cursor icur;
 
@@ -6226,23 +6226,17 @@ xfs_bmap_validate_extent(
        struct xfs_bmbt_irec    *irec)
 {
        struct xfs_mount        *mp = ip->i_mount;
-       xfs_fsblock_t           endfsb;
-       bool                    isrt;
 
-       isrt = XFS_IS_REALTIME_INODE(ip);
-       endfsb = irec->br_startblock + irec->br_blockcount - 1;
-       if (isrt && whichfork == XFS_DATA_FORK) {
-               if (!xfs_verify_rtbno(mp, irec->br_startblock))
-                       return __this_address;
-               if (!xfs_verify_rtbno(mp, endfsb))
+       if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
+               return __this_address;
+
+       if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
+               if (!xfs_verify_rtext(mp, irec->br_startblock,
+                                         irec->br_blockcount))
                        return __this_address;
        } else {
-               if (!xfs_verify_fsbno(mp, irec->br_startblock))
-                       return __this_address;
-               if (!xfs_verify_fsbno(mp, endfsb))
-                       return __this_address;
-               if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
-                   XFS_FSB_TO_AGNO(mp, endfsb))
+               if (!xfs_verify_fsbext(mp, irec->br_startblock,
+                                          irec->br_blockcount))
                        return __this_address;
        }
        if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
index ecec604..9766591 100644 (file)
@@ -639,8 +639,6 @@ xfs_bmbt_change_owner(
        ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE);
 
        cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
-       if (!cur)
-               return -ENOMEM;
        cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER;
 
        error = xfs_btree_change_owner(cur, new_owner, buffer_list);
index 2d25bab..c4d7a92 100644 (file)
@@ -397,7 +397,7 @@ xfs_btree_dup_cursor(
        xfs_btree_cur_t *cur,           /* input cursor */
        xfs_btree_cur_t **ncur)         /* output cursor */
 {
-       xfs_buf_t       *bp;            /* btree block's buffer pointer */
+       struct xfs_buf  *bp;            /* btree block's buffer pointer */
        int             error;          /* error return value */
        int             i;              /* level number of btree block */
        xfs_mount_t     *mp;            /* mount structure for filesystem */
@@ -701,7 +701,7 @@ xfs_btree_firstrec(
        int                     level)  /* level to change */
 {
        struct xfs_btree_block  *block; /* generic btree block pointer */
-       xfs_buf_t               *bp;    /* buffer containing block */
+       struct xfs_buf          *bp;    /* buffer containing block */
 
        /*
         * Get the block pointer for this level.
@@ -731,7 +731,7 @@ xfs_btree_lastrec(
        int                     level)  /* level to change */
 {
        struct xfs_btree_block  *block; /* generic btree block pointer */
-       xfs_buf_t               *bp;    /* buffer containing block */
+       struct xfs_buf          *bp;    /* buffer containing block */
 
        /*
         * Get the block pointer for this level.
@@ -993,7 +993,7 @@ STATIC void
 xfs_btree_setbuf(
        xfs_btree_cur_t         *cur,   /* btree cursor */
        int                     lev,    /* level in btree */
-       xfs_buf_t               *bp)    /* new buffer to set */
+       struct xfs_buf          *bp)    /* new buffer to set */
 {
        struct xfs_btree_block  *b;     /* btree block */
 
@@ -1636,7 +1636,7 @@ xfs_btree_decrement(
        int                     *stat)          /* success/failure */
 {
        struct xfs_btree_block  *block;
-       xfs_buf_t               *bp;
+       struct xfs_buf          *bp;
        int                     error;          /* error return value */
        int                     lev;
        union xfs_btree_ptr     ptr;
@@ -4070,7 +4070,7 @@ xfs_btree_delrec(
         * surviving block, and log it.
         */
        xfs_btree_set_numrecs(left, lrecs + rrecs);
-       xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
+       xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
        xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
        xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
 
index dd764da..630388b 100644 (file)
@@ -468,11 +468,13 @@ xfs_sb_has_ro_compat_feature(
 #define XFS_SB_FEAT_INCOMPAT_SPINODES  (1 << 1)        /* sparse inode chunks */
 #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2)        /* metadata UUID */
 #define XFS_SB_FEAT_INCOMPAT_BIGTIME   (1 << 3)        /* large timestamps */
+#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4)      /* needs xfs_repair */
 #define XFS_SB_FEAT_INCOMPAT_ALL \
                (XFS_SB_FEAT_INCOMPAT_FTYPE|    \
                 XFS_SB_FEAT_INCOMPAT_SPINODES| \
                 XFS_SB_FEAT_INCOMPAT_META_UUID| \
-                XFS_SB_FEAT_INCOMPAT_BIGTIME)
+                XFS_SB_FEAT_INCOMPAT_BIGTIME| \
+                XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR)
 
 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN   ~XFS_SB_FEAT_INCOMPAT_ALL
 static inline bool
@@ -584,6 +586,12 @@ static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp)
                (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT);
 }
 
+static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp)
+{
+       return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+               (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+}
+
 /*
  * end of superblock version macros
  */
@@ -625,7 +633,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
 #define XFS_B_TO_FSB(mp,b)     \
        ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
 #define XFS_B_TO_FSBT(mp,b)    (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
-#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask)
 
 /*
  * Allocation group header
index 974e71b..69b228f 100644 (file)
@@ -607,13 +607,13 @@ error:
 
 /*
  * Allocate new inodes in the allocation group specified by agbp.
- * Return 0 for success, else error code.
+ * Returns 0 if inodes were allocated in this AG; 1 if there was no space
+ * in this AG; or the usual negative error code.
  */
 STATIC int
 xfs_ialloc_ag_alloc(
        struct xfs_trans        *tp,
-       struct xfs_buf          *agbp,
-       int                     *alloc)
+       struct xfs_buf          *agbp)
 {
        struct xfs_agi          *agi;
        struct xfs_alloc_arg    args;
@@ -795,10 +795,9 @@ sparse_alloc:
                allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
        }
 
-       if (args.fsbno == NULLFSBLOCK) {
-               *alloc = 0;
-               return 0;
-       }
+       if (args.fsbno == NULLFSBLOCK)
+               return 1;
+
        ASSERT(args.len == args.minlen);
 
        /*
@@ -903,7 +902,6 @@ sparse_alloc:
         */
        xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
        xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
-       *alloc = 1;
        return 0;
 }
 
@@ -1570,7 +1568,7 @@ xfs_dialloc_ag_update_inobt(
  * The caller selected an AG for us, and made sure that free inodes are
  * available.
  */
-STATIC int
+int
 xfs_dialloc_ag(
        struct xfs_trans        *tp,
        struct xfs_buf          *agbp,
@@ -1682,65 +1680,78 @@ error_cur:
        return error;
 }
 
+static int
+xfs_dialloc_roll(
+       struct xfs_trans        **tpp,
+       struct xfs_buf          *agibp)
+{
+       struct xfs_trans        *tp = *tpp;
+       struct xfs_dquot_acct   *dqinfo;
+       int                     error;
+
+       /*
+        * Hold to on to the agibp across the commit so no other allocation can
+        * come in and take the free inodes we just allocated for our caller.
+        */
+       xfs_trans_bhold(tp, agibp);
+
+       /*
+        * We want the quota changes to be associated with the next transaction,
+        * NOT this one. So, detach the dqinfo from this and attach it to the
+        * next transaction.
+        */
+       dqinfo = tp->t_dqinfo;
+       tp->t_dqinfo = NULL;
+
+       error = xfs_trans_roll(&tp);
+
+       /* Re-attach the quota info that we detached from prev trx. */
+       tp->t_dqinfo = dqinfo;
+
+       *tpp = tp;
+       if (error)
+               return error;
+       xfs_trans_bjoin(tp, agibp);
+       return 0;
+}
+
 /*
- * Allocate an inode on disk.
- *
- * Mode is used to tell whether the new inode will need space, and whether it
- * is a directory.
+ * Select and prepare an AG for inode allocation.
  *
- * This function is designed to be called twice if it has to do an allocation
- * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
- * If an inode is available without having to performn an allocation, an inode
- * number is returned.  In this case, *IO_agbp is set to NULL.  If an allocation
- * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
- * The caller should then commit the current transaction, allocate a
- * new transaction, and call xfs_dialloc() again, passing in the previous value
- * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
- * buffer is locked across the two calls, the second call is guaranteed to have
- * a free inode available.
+ * Mode is used to tell whether the new inode is a directory and hence where to
+ * locate it.
  *
- * Once we successfully pick an inode its number is returned and the on-disk
- * data structures are updated.  The inode itself is not read in, since doing so
- * would break ordering constraints with xfs_reclaim.
+ * This function will ensure that the selected AG has free inodes available to
+ * allocate from. The selected AGI will be returned locked to the caller, and it
+ * will allocate more free inodes if required. If no free inodes are found or
+ * can be allocated, no AGI will be returned.
  */
 int
-xfs_dialloc(
-       struct xfs_trans        *tp,
+xfs_dialloc_select_ag(
+       struct xfs_trans        **tpp,
        xfs_ino_t               parent,
        umode_t                 mode,
-       struct xfs_buf          **IO_agbp,
-       xfs_ino_t               *inop)
+       struct xfs_buf          **IO_agbp)
 {
-       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_mount        *mp = (*tpp)->t_mountp;
        struct xfs_buf          *agbp;
        xfs_agnumber_t          agno;
        int                     error;
-       int                     ialloced;
-       int                     noroom = 0;
+       bool                    noroom = false;
        xfs_agnumber_t          start_agno;
        struct xfs_perag        *pag;
        struct xfs_ino_geometry *igeo = M_IGEO(mp);
-       int                     okalloc = 1;
+       bool                    okalloc = true;
 
-       if (*IO_agbp) {
-               /*
-                * If the caller passes in a pointer to the AGI buffer,
-                * continue where we left off before.  In this case, we
-                * know that the allocation group has free inodes.
-                */
-               agbp = *IO_agbp;
-               goto out_alloc;
-       }
+       *IO_agbp = NULL;
 
        /*
         * We do not have an agbp, so select an initial allocation
         * group for inode allocation.
         */
-       start_agno = xfs_ialloc_ag_select(tp, parent, mode);
-       if (start_agno == NULLAGNUMBER) {
-               *inop = NULLFSINO;
+       start_agno = xfs_ialloc_ag_select(*tpp, parent, mode);
+       if (start_agno == NULLAGNUMBER)
                return 0;
-       }
 
        /*
         * If we have already hit the ceiling of inode blocks then clear
@@ -1753,8 +1764,8 @@ xfs_dialloc(
        if (igeo->maxicount &&
            percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
                                                        > igeo->maxicount) {
-               noroom = 1;
-               okalloc = 0;
+               noroom = true;
+               okalloc = false;
        }
 
        /*
@@ -1771,9 +1782,9 @@ xfs_dialloc(
                }
 
                if (!pag->pagi_init) {
-                       error = xfs_ialloc_pagi_init(mp, tp, agno);
+                       error = xfs_ialloc_pagi_init(mp, *tpp, agno);
                        if (error)
-                               goto out_error;
+                               break;
                }
 
                /*
@@ -1786,64 +1797,59 @@ xfs_dialloc(
                 * Then read in the AGI buffer and recheck with the AGI buffer
                 * lock held.
                 */
-               error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+               error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp);
                if (error)
-                       goto out_error;
+                       break;
 
                if (pag->pagi_freecount) {
                        xfs_perag_put(pag);
-                       goto out_alloc;
+                       goto found_ag;
                }
 
                if (!okalloc)
                        goto nextag_relse_buffer;
 
+               error = xfs_ialloc_ag_alloc(*tpp, agbp);
+               if (error < 0) {
+                       xfs_trans_brelse(*tpp, agbp);
 
-               error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
-               if (error) {
-                       xfs_trans_brelse(tp, agbp);
-
-                       if (error != -ENOSPC)
-                               goto out_error;
-
-                       xfs_perag_put(pag);
-                       *inop = NULLFSINO;
-                       return 0;
+                       if (error == -ENOSPC)
+                               error = 0;
+                       break;
                }
 
-               if (ialloced) {
+               if (error == 0) {
                        /*
-                        * We successfully allocated some inodes, return
-                        * the current context to the caller so that it
-                        * can commit the current transaction and call
-                        * us again where we left off.
+                        * We successfully allocated space for an inode cluster
+                        * in this AG.  Roll the transaction so that we can
+                        * allocate one of the new inodes.
                         */
                        ASSERT(pag->pagi_freecount > 0);
                        xfs_perag_put(pag);
 
-                       *IO_agbp = agbp;
-                       *inop = NULLFSINO;
-                       return 0;
+                       error = xfs_dialloc_roll(tpp, agbp);
+                       if (error) {
+                               xfs_buf_relse(agbp);
+                               return error;
+                       }
+                       goto found_ag;
                }
 
 nextag_relse_buffer:
-               xfs_trans_brelse(tp, agbp);
+               xfs_trans_brelse(*tpp, agbp);
 nextag:
                xfs_perag_put(pag);
                if (++agno == mp->m_sb.sb_agcount)
                        agno = 0;
-               if (agno == start_agno) {
-                       *inop = NULLFSINO;
+               if (agno == start_agno)
                        return noroom ? -ENOSPC : 0;
-               }
        }
 
-out_alloc:
-       *IO_agbp = NULL;
-       return xfs_dialloc_ag(tp, agbp, parent, inop);
-out_error:
        xfs_perag_put(pag);
        return error;
+found_ag:
+       *IO_agbp = agbp;
+       return 0;
 }
 
 /*
@@ -2453,7 +2459,7 @@ out_map:
 void
 xfs_ialloc_log_agi(
        xfs_trans_t     *tp,            /* transaction pointer */
-       xfs_buf_t       *bp,            /* allocation group header buffer */
+       struct xfs_buf  *bp,            /* allocation group header buffer */
        int             fields)         /* bitmask of fields to log */
 {
        int                     first;          /* first byte number */
@@ -2674,7 +2680,7 @@ xfs_ialloc_pagi_init(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_agnumber_t  agno)           /* allocation group number */
 {
-       xfs_buf_t       *bp = NULL;
+       struct xfs_buf  *bp = NULL;
        int             error;
 
        error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
index 72b3468..3511086 100644 (file)
@@ -37,30 +37,26 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
  * Mode is used to tell whether the new inode will need space, and whether
  * it is a directory.
  *
- * To work within the constraint of one allocation per transaction,
- * xfs_dialloc() is designed to be called twice if it has to do an
- * allocation to make more free inodes.  If an inode is
- * available without an allocation, agbp would be set to the current
- * agbp and alloc_done set to false.
- * If an allocation needed to be done, agbp would be set to the
- * inode header of the allocation group and alloc_done set to true.
- * The caller should then commit the current transaction and allocate a new
- * transaction.  xfs_dialloc() should then be called again with
- * the agbp value returned from the previous call.
- *
- * Once we successfully pick an inode its number is returned and the
- * on-disk data structures are updated.  The inode itself is not read
- * in, since doing so would break ordering constraints with xfs_reclaim.
- *
- * *agbp should be set to NULL on the first call, *alloc_done set to FALSE.
+ * There are two phases to inode allocation: selecting an AG and ensuring
+ * that it contains free inodes, followed by allocating one of the free
+ * inodes. xfs_dialloc_select_ag() does the former and returns a locked AGI
+ * to the caller, ensuring that followup call to xfs_dialloc_ag() will
+ * have free inodes to allocate from. xfs_dialloc_ag() will return the inode
+ * number of the free inode we allocated.
  */
 int                                    /* error */
-xfs_dialloc(
-       struct xfs_trans *tp,           /* transaction pointer */
+xfs_dialloc_select_ag(
+       struct xfs_trans **tpp,         /* double pointer of transaction */
        xfs_ino_t       parent,         /* parent inode (directory) */
        umode_t         mode,           /* mode bits for new inode */
-       struct xfs_buf  **agbp,         /* buf for a.g. inode header */
-       xfs_ino_t       *inop);         /* inode number allocated */
+       struct xfs_buf  **IO_agbp);
+
+int
+xfs_dialloc_ag(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_ino_t               parent,
+       xfs_ino_t               *inop);
 
 /*
  * Free disk inode.  Carefully avoids touching the incore inode, all
index cc919a2..4c58316 100644 (file)
@@ -672,11 +672,6 @@ xfs_inobt_cur(
                return error;
 
        cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
-       if (!cur) {
-               xfs_trans_brelse(tp, *agi_bpp);
-               *agi_bpp = NULL;
-               return -ENOMEM;
-       }
        *curpp = cur;
        return 0;
 }
index c667c63..4d7410e 100644 (file)
@@ -547,10 +547,6 @@ xfs_dinode_verify(
        if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
                return __this_address;
 
-       /* don't let reflink and dax mix */
-       if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
-               return __this_address;
-
        /* COW extent size hint validation */
        fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
                        mode, flags, flags2);
index 2076627..2037b9f 100644 (file)
@@ -1179,10 +1179,6 @@ xfs_refcount_finish_one(
                        return error;
 
                rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
-               if (!rcur) {
-                       error = -ENOMEM;
-                       goto out_cur;
-               }
                rcur->bc_ag.refc.nr_ops = nr_ops;
                rcur->bc_ag.refc.shape_changes = shape_changes;
        }
@@ -1217,11 +1213,6 @@ xfs_refcount_finish_one(
                trace_xfs_refcount_finish_one_leftover(mp, agno, type,
                                bno, blockcount, new_agbno, *new_len);
        return error;
-
-out_cur:
-       xfs_trans_brelse(tp, agbp);
-
-       return error;
 }
 
 /*
index 2668ebe..10e0cf9 100644 (file)
@@ -2404,10 +2404,6 @@ xfs_rmap_finish_one(
                        return -EFSCORRUPTED;
 
                rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
-               if (!rcur) {
-                       error = -ENOMEM;
-                       goto out_cur;
-               }
        }
        *pcur = rcur;
 
@@ -2446,11 +2442,6 @@ xfs_rmap_finish_one(
                error = -EFSCORRUPTED;
        }
        return error;
-
-out_cur:
-       xfs_trans_brelse(tp, agbp);
-
-       return error;
 }
 
 /*
index 6c1aba1..fe3a495 100644 (file)
@@ -56,9 +56,9 @@ xfs_rtbuf_get(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_rtblock_t   block,          /* block number in bitmap or summary */
        int             issum,          /* is summary not bitmap */
-       xfs_buf_t       **bpp)          /* output: buffer for the block */
+       struct xfs_buf  **bpp)          /* output: buffer for the block */
 {
-       xfs_buf_t       *bp;            /* block buffer, result */
+       struct xfs_buf  *bp;            /* block buffer, result */
        xfs_inode_t     *ip;            /* bitmap or summary inode */
        xfs_bmbt_irec_t map;
        int             nmap = 1;
@@ -101,7 +101,7 @@ xfs_rtfind_back(
        xfs_rtword_t    *b;             /* current word in buffer */
        int             bit;            /* bit number in the word */
        xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
+       struct xfs_buf  *bp;            /* buf for the block */
        xfs_rtword_t    *bufp;          /* starting word in buffer */
        int             error;          /* error value */
        xfs_rtblock_t   firstbit;       /* first useful bit in the word */
@@ -276,7 +276,7 @@ xfs_rtfind_forw(
        xfs_rtword_t    *b;             /* current word in buffer */
        int             bit;            /* bit number in the word */
        xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
+       struct xfs_buf  *bp;            /* buf for the block */
        xfs_rtword_t    *bufp;          /* starting word in buffer */
        int             error;          /* error value */
        xfs_rtblock_t   i;              /* current bit number rel. to start */
@@ -447,11 +447,11 @@ xfs_rtmodify_summary_int(
        int             log,            /* log2 of extent size */
        xfs_rtblock_t   bbno,           /* bitmap block number */
        int             delta,          /* change to make to summary info */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        xfs_suminfo_t   *sum)           /* out: summary info for this block */
 {
-       xfs_buf_t       *bp;            /* buffer for the summary block */
+       struct xfs_buf  *bp;            /* buffer for the summary block */
        int             error;          /* error value */
        xfs_fsblock_t   sb;             /* summary fsblock */
        int             so;             /* index into the summary file */
@@ -517,7 +517,7 @@ xfs_rtmodify_summary(
        int             log,            /* log2 of extent size */
        xfs_rtblock_t   bbno,           /* bitmap block number */
        int             delta,          /* change to make to summary info */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb)           /* in/out: summary block number */
 {
        return xfs_rtmodify_summary_int(mp, tp, log, bbno,
@@ -539,7 +539,7 @@ xfs_rtmodify_range(
        xfs_rtword_t    *b;             /* current word in buffer */
        int             bit;            /* bit number in the word */
        xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
+       struct xfs_buf  *bp;            /* buf for the block */
        xfs_rtword_t    *bufp;          /* starting word in buffer */
        int             error;          /* error value */
        xfs_rtword_t    *first;         /* first used word in the buffer */
@@ -690,7 +690,7 @@ xfs_rtfree_range(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_rtblock_t   start,          /* starting block to free */
        xfs_extlen_t    len,            /* length to free */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb)           /* in/out: summary block number */
 {
        xfs_rtblock_t   end;            /* end of the freed extent */
@@ -773,7 +773,7 @@ xfs_rtcheck_range(
        xfs_rtword_t    *b;             /* current word in buffer */
        int             bit;            /* bit number in the word */
        xfs_rtblock_t   block;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* buf for the block */
+       struct xfs_buf  *bp;            /* buf for the block */
        xfs_rtword_t    *bufp;          /* starting word in buffer */
        int             error;          /* error value */
        xfs_rtblock_t   i;              /* current bit number rel. to start */
@@ -969,7 +969,7 @@ xfs_rtfree_extent(
        int             error;          /* error value */
        xfs_mount_t     *mp;            /* file system mount structure */
        xfs_fsblock_t   sb;             /* summary file block number */
-       xfs_buf_t       *sumbp = NULL;  /* summary file block buffer */
+       struct xfs_buf  *sumbp = NULL;  /* summary file block buffer */
 
        mp = tp->t_mountp;
 
index 5aeafa5..bbda117 100644 (file)
@@ -360,21 +360,18 @@ xfs_validate_sb_common(
                }
        }
 
-       if (sbp->sb_unit) {
-               if (!xfs_sb_version_hasdalign(sbp) ||
-                   sbp->sb_unit > sbp->sb_width ||
-                   (sbp->sb_width % sbp->sb_unit) != 0) {
-                       xfs_notice(mp, "SB stripe unit sanity check failed");
-                       return -EFSCORRUPTED;
-               }
-       } else if (xfs_sb_version_hasdalign(sbp)) {
+       /*
+        * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign)
+        * would imply the image is corrupted.
+        */
+       if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) {
                xfs_notice(mp, "SB stripe alignment sanity check failed");
                return -EFSCORRUPTED;
-       } else if (sbp->sb_width) {
-               xfs_notice(mp, "SB stripe width sanity check failed");
-               return -EFSCORRUPTED;
        }
 
+       if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit),
+                       XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
+               return -EFSCORRUPTED;
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
            sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
@@ -383,17 +380,6 @@ xfs_validate_sb_common(
        }
 
        /*
-        * Until this is fixed only page-sized or smaller data blocks work.
-        */
-       if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
-               xfs_warn(mp,
-               "File system with blocksize %d bytes. "
-               "Only pagesize (%ld) or less will currently work.",
-                               sbp->sb_blocksize, PAGE_SIZE);
-               return -ENOSYS;
-       }
-
-       /*
         * Currently only very few inode sizes are supported.
         */
        switch (sbp->sb_inodesize) {
@@ -408,22 +394,6 @@ xfs_validate_sb_common(
                return -ENOSYS;
        }
 
-       if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
-           xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
-               xfs_warn(mp,
-               "file system too large to be mounted on this system.");
-               return -EFBIG;
-       }
-
-       /*
-        * Don't touch the filesystem if a user tool thinks it owns the primary
-        * superblock.  mkfs doesn't clear the flag from secondary supers, so
-        * we don't check them at all.
-        */
-       if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) {
-               xfs_warn(mp, "Offline file system operation in progress!");
-               return -EFSCORRUPTED;
-       }
        return 0;
 }
 
@@ -1233,3 +1203,61 @@ xfs_sb_get_secondary(
        *bpp = bp;
        return 0;
 }
+
+/*
+ * sunit, swidth, sectorsize(optional with 0) should be all in bytes,
+ * so users won't be confused by values in error messages.
+ */
+bool
+xfs_validate_stripe_geometry(
+       struct xfs_mount        *mp,
+       __s64                   sunit,
+       __s64                   swidth,
+       int                     sectorsize,
+       bool                    silent)
+{
+       if (swidth > INT_MAX) {
+               if (!silent)
+                       xfs_notice(mp,
+"stripe width (%lld) is too large", swidth);
+               return false;
+       }
+
+       if (sunit > swidth) {
+               if (!silent)
+                       xfs_notice(mp,
+"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth);
+               return false;
+       }
+
+       if (sectorsize && (int)sunit % sectorsize) {
+               if (!silent)
+                       xfs_notice(mp,
+"stripe unit (%lld) must be a multiple of the sector size (%d)",
+                                  sunit, sectorsize);
+               return false;
+       }
+
+       if (sunit && !swidth) {
+               if (!silent)
+                       xfs_notice(mp,
+"invalid stripe unit (%lld) and stripe width of 0", sunit);
+               return false;
+       }
+
+       if (!sunit && swidth) {
+               if (!silent)
+                       xfs_notice(mp,
+"invalid stripe width (%lld) and stripe unit of 0", swidth);
+               return false;
+       }
+
+       if (sunit && (int)swidth % (int)sunit) {
+               if (!silent)
+                       xfs_notice(mp,
+"stripe width (%lld) must be a multiple of the stripe unit (%lld)",
+                                  swidth, sunit);
+               return false;
+       }
+       return true;
+}
index 92465a9..f79f9dc 100644 (file)
@@ -42,4 +42,7 @@ extern int    xfs_sb_get_secondary(struct xfs_mount *mp,
                                struct xfs_trans *tp, xfs_agnumber_t agno,
                                struct xfs_buf **bpp);
 
+extern bool    xfs_validate_stripe_geometry(struct xfs_mount *mp,
+               __s64 sunit, __s64 swidth, int sectorsize, bool silent);
+
 #endif /* __XFS_SB_H__ */
index c795ae4..8c61a46 100644 (file)
@@ -62,7 +62,6 @@ void  xfs_log_get_max_trans_res(struct xfs_mount *mp,
 #define        XFS_TRANS_SB_DIRTY      0x02    /* superblock is modified */
 #define        XFS_TRANS_PERM_LOG_RES  0x04    /* xact took a permanent log res */
 #define        XFS_TRANS_SYNC          0x08    /* make commit synchronous */
-#define XFS_TRANS_DQ_DIRTY     0x10    /* at least one dquot in trx dirty */
 #define XFS_TRANS_RESERVE      0x20    /* OK to use reserved data blocks */
 #define XFS_TRANS_NO_WRITECOUNT 0x40   /* do not elevate SB writecount */
 #define XFS_TRANS_RES_FDBLKS   0x80    /* reserve newly freed blocks */
index 4f59554..b254fbe 100644 (file)
@@ -61,6 +61,29 @@ xfs_verify_fsbno(
        return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
 }
 
+/*
+ * Verify that a data device extent is fully contained inside the filesystem,
+ * does not cross an AG boundary, and does not point at static metadata.
+ */
+bool
+xfs_verify_fsbext(
+       struct xfs_mount        *mp,
+       xfs_fsblock_t           fsbno,
+       xfs_fsblock_t           len)
+{
+       if (fsbno + len <= fsbno)
+               return false;
+
+       if (!xfs_verify_fsbno(mp, fsbno))
+               return false;
+
+       if (!xfs_verify_fsbno(mp, fsbno + len - 1))
+               return false;
+
+       return  XFS_FSB_TO_AGNO(mp, fsbno) ==
+               XFS_FSB_TO_AGNO(mp, fsbno + len - 1);
+}
+
 /* Calculate the first and last possible inode number in an AG. */
 void
 xfs_agino_range(
@@ -175,6 +198,22 @@ xfs_verify_rtbno(
        return rtbno < mp->m_sb.sb_rblocks;
 }
 
+/* Verify that a realtime device extent is fully contained inside the volume. */
+bool
+xfs_verify_rtext(
+       struct xfs_mount        *mp,
+       xfs_rtblock_t           rtbno,
+       xfs_rtblock_t           len)
+{
+       if (rtbno + len <= rtbno)
+               return false;
+
+       if (!xfs_verify_rtbno(mp, rtbno))
+               return false;
+
+       return xfs_verify_rtbno(mp, rtbno + len - 1);
+}
+
 /* Calculate the range of valid icount values. */
 void
 xfs_icount_range(
@@ -219,3 +258,28 @@ xfs_verify_dablk(
 
        return dabno <= max_dablk;
 }
+
+/* Check that a file block offset does not exceed the maximum. */
+bool
+xfs_verify_fileoff(
+       struct xfs_mount        *mp,
+       xfs_fileoff_t           off)
+{
+       return off <= XFS_MAX_FILEOFF;
+}
+
+/* Check that a range of file block offsets do not exceed the maximum. */
+bool
+xfs_verify_fileext(
+       struct xfs_mount        *mp,
+       xfs_fileoff_t           off,
+       xfs_fileoff_t           len)
+{
+       if (off + len <= off)
+               return false;
+
+       if (!xfs_verify_fileoff(mp, off))
+               return false;
+
+       return xfs_verify_fileoff(mp, off + len - 1);
+}
index 397d947..064bd6e 100644 (file)
@@ -184,6 +184,8 @@ xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno);
 bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
                xfs_agblock_t agbno);
 bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno,
+               xfs_fsblock_t len);
 
 void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
                xfs_agino_t *first, xfs_agino_t *last);
@@ -195,9 +197,14 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
+               xfs_rtblock_t len);
 bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount);
 bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off);
 void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min,
                unsigned long long *max);
+bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off);
+bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
+               xfs_fileoff_t len);
 
 #endif /* __XFS_TYPES_H__ */
index 401f715..23690f8 100644 (file)
@@ -829,8 +829,6 @@ xrep_agi_calc_from_btrees(
 
                cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno,
                                XFS_BTNUM_FINO);
-               if (error)
-                       goto err;
                error = xfs_btree_count_blocks(cur, &blocks);
                if (error)
                        goto err;
index fed56d2..33559c3 100644 (file)
@@ -319,7 +319,6 @@ xchk_bmap_iextent(
        struct xfs_bmbt_irec    *irec)
 {
        struct xfs_mount        *mp = info->sc->mp;
-       xfs_filblks_t           end;
        int                     error = 0;
 
        /*
@@ -330,6 +329,10 @@ xchk_bmap_iextent(
                xchk_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
 
+       if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
+               xchk_fblock_set_corrupt(info->sc, info->whichfork,
+                               irec->br_startoff);
+
        xchk_bmap_dirattr_extent(ip, info, irec);
 
        /* There should never be a "hole" extent in either extent list. */
@@ -349,20 +352,12 @@ xchk_bmap_iextent(
        if (irec->br_blockcount > MAXEXTLEN)
                xchk_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
-       if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
-               xchk_fblock_set_corrupt(info->sc, info->whichfork,
-                               irec->br_startoff);
-       end = irec->br_startblock + irec->br_blockcount - 1;
        if (info->is_rt &&
-           (!xfs_verify_rtbno(mp, irec->br_startblock) ||
-            !xfs_verify_rtbno(mp, end)))
+           !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
                xchk_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
        if (!info->is_rt &&
-           (!xfs_verify_fsbno(mp, irec->br_startblock) ||
-            !xfs_verify_fsbno(mp, end) ||
-            XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
-                               XFS_FSB_TO_AGNO(mp, end)))
+           !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
                xchk_fblock_set_corrupt(info->sc, info->whichfork,
                                irec->br_startoff);
 
@@ -563,10 +558,6 @@ xchk_bmap_check_ag_rmaps(
                return error;
 
        cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
-       if (!cur) {
-               error = -ENOMEM;
-               goto out_agf;
-       }
 
        sbcri.sc = sc;
        sbcri.whichfork = whichfork;
@@ -575,7 +566,6 @@ xchk_bmap_check_ag_rmaps(
                error = 0;
 
        xfs_btree_del_cursor(cur, error);
-out_agf:
        xfs_trans_brelse(sc->tp, agf);
        return error;
 }
index 1887605..8ea6d4a 100644 (file)
@@ -466,8 +466,6 @@ xchk_ag_btcur_init(
                /* Set up a bnobt cursor for cross-referencing. */
                sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
                                agno, XFS_BTNUM_BNO);
-               if (!sa->bno_cur)
-                       goto err;
        }
 
        if (sa->agf_bp &&
@@ -475,8 +473,6 @@ xchk_ag_btcur_init(
                /* Set up a cntbt cursor for cross-referencing. */
                sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
                                agno, XFS_BTNUM_CNT);
-               if (!sa->cnt_cur)
-                       goto err;
        }
 
        /* Set up a inobt cursor for cross-referencing. */
@@ -484,8 +480,6 @@ xchk_ag_btcur_init(
            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
                sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
                                        agno, XFS_BTNUM_INO);
-               if (!sa->ino_cur)
-                       goto err;
        }
 
        /* Set up a finobt cursor for cross-referencing. */
@@ -493,8 +487,6 @@ xchk_ag_btcur_init(
            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
                sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
                                agno, XFS_BTNUM_FINO);
-               if (!sa->fino_cur)
-                       goto err;
        }
 
        /* Set up a rmapbt cursor for cross-referencing. */
@@ -502,8 +494,6 @@ xchk_ag_btcur_init(
            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
                sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
                                agno);
-               if (!sa->rmap_cur)
-                       goto err;
        }
 
        /* Set up a refcountbt cursor for cross-referencing. */
@@ -511,13 +501,9 @@ xchk_ag_btcur_init(
            xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
                sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
                                sa->agf_bp, agno);
-               if (!sa->refc_cur)
-                       goto err;
        }
 
        return 0;
-err:
-       return -ENOMEM;
 }
 
 /* Release the AG header context and btree cursors. */
index b045e95..178b345 100644 (file)
@@ -66,8 +66,18 @@ xchk_dir_check_ftype(
         * eofblocks cleanup (which allocates what would be a nested
         * transaction), we can't use DONTCACHE here because DONTCACHE
         * inodes can trigger immediate inactive cleanup of the inode.
+        *
+        * If _iget returns -EINVAL or -ENOENT then the child inode number is
+        * garbage and the directory is corrupt.  If the _iget returns
+        * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
+        *  cross referencing error.  Any other error is an operational error.
         */
        error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
+       if (error == -EINVAL || error == -ENOENT) {
+               error = -EFSCORRUPTED;
+               xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error);
+               goto out;
+       }
        if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
                        &error))
                goto out;
@@ -105,6 +115,7 @@ xchk_dir_actor(
        struct xfs_name         xname;
        xfs_ino_t               lookup_ino;
        xfs_dablk_t             offset;
+       bool                    checked_ftype = false;
        int                     error = 0;
 
        sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter);
@@ -133,6 +144,7 @@ xchk_dir_actor(
                if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
                                        offset);
+               checked_ftype = true;
                if (ino != ip->i_ino)
                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
                                        offset);
@@ -144,6 +156,7 @@ xchk_dir_actor(
                if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
                                        offset);
+               checked_ftype = true;
                if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
                        xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
                                        offset);
@@ -167,9 +180,11 @@ xchk_dir_actor(
        }
 
        /* Verify the file type.  This function absorbs error codes. */
-       error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type);
-       if (error)
-               goto out;
+       if (!checked_ftype) {
+               error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type);
+               if (error)
+                       goto out;
+       }
 out:
        /*
         * A negative error code returned here is supposed to cause the
index bb25ff1..faf65eb 100644 (file)
@@ -185,10 +185,6 @@ xchk_inode_flags2(
        if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
                goto bad;
 
-       /* dax and reflink make no sense, currently */
-       if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
-               goto bad;
-
        /* no bigtime iflag without the bigtime feature */
        if (xfs_dinode_has_bigtime(dip) &&
            !xfs_sb_version_hasbigtime(&mp->m_sb))
index 855aa8b..66c35f6 100644 (file)
@@ -164,13 +164,13 @@ xchk_parent_validate(
         * can't use DONTCACHE here because DONTCACHE inodes can trigger
         * immediate inactive cleanup of the inode.
         *
-        * If _iget returns -EINVAL then the parent inode number is garbage
-        * and the directory is corrupt.  If the _iget returns -EFSCORRUPTED
-        * or -EFSBADCRC then the parent is corrupt which is a cross
-        * referencing error.  Any other error is an operational error.
+        * If _iget returns -EINVAL or -ENOENT then the parent inode number is
+        * garbage and the directory is corrupt.  If the _iget returns
+        * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
+        *  cross referencing error.  Any other error is an operational error.
         */
        error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
-       if (error == -EINVAL) {
+       if (error == -EINVAL || error == -ENOENT) {
                error = -EFSCORRUPTED;
                xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
                goto out;
index 76e4ffe..d409ca5 100644 (file)
@@ -52,9 +52,7 @@ xchk_rtbitmap_rec(
        startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize;
        blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize;
 
-       if (startblock + blockcount <= startblock ||
-           !xfs_verify_rtbno(sc->mp, startblock) ||
-           !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1))
+       if (!xfs_verify_rtext(sc->mp, startblock, blockcount))
                xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
        return 0;
 }
index c544951..779cb73 100644 (file)
@@ -16,6 +16,7 @@
 #include "xfs_acl.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
+#include "xfs_trans.h"
 
 #include <linux/posix_acl_xattr.h>
 
@@ -212,21 +213,28 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 }
 
 static int
-xfs_set_mode(struct inode *inode, umode_t mode)
+xfs_acl_set_mode(
+       struct inode            *inode,
+       umode_t                 mode)
 {
-       int error = 0;
-
-       if (mode != inode->i_mode) {
-               struct iattr iattr;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
 
-               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
-               iattr.ia_mode = mode;
-               iattr.ia_ctime = current_time(inode);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
+               return error;
 
-               error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-       }
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+       inode->i_mode = mode;
+       inode->i_ctime = current_time(inode);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-       return error;
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       return xfs_trans_commit(tp);
 }
 
 int
@@ -251,18 +259,14 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        }
 
  set_acl:
-       error =  __xfs_set_acl(inode, acl, type);
-       if (error)
-               return error;
-
        /*
         * We set the mode after successfully updating the ACL xattr because the
         * xattr update can fail at ENOSPC and we don't want to change the mode
         * if the ACL update hasn't been applied.
         */
-       if (set_mode)
-               error = xfs_set_mode(inode, mode);
-
+       error =  __xfs_set_acl(inode, acl, type);
+       if (!error && set_mode && mode != inode->i_mode)
+               error = xfs_acl_set_mode(inode, mode);
        return error;
 }
 
index 9e16a4d..93e4d8a 100644 (file)
@@ -417,6 +417,40 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
        .cancel_item    = xfs_bmap_update_cancel_item,
 };
 
+/* Is this recovered BUI ok? */
+static inline bool
+xfs_bui_validate(
+       struct xfs_mount                *mp,
+       struct xfs_bui_log_item         *buip)
+{
+       struct xfs_map_extent           *bmap;
+
+       /* Only one mapping operation per BUI... */
+       if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+               return false;
+
+       bmap = &buip->bui_format.bui_extents[0];
+
+       if (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)
+               return false;
+
+       switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+       case XFS_BMAP_MAP:
+       case XFS_BMAP_UNMAP:
+               break;
+       default:
+               return false;
+       }
+
+       if (!xfs_verify_ino(mp, bmap->me_owner))
+               return false;
+
+       if (!xfs_verify_fileext(mp, bmap->me_startoff, bmap->me_len))
+               return false;
+
+       return xfs_verify_fsbext(mp, bmap->me_startblock, bmap->me_len);
+}
+
 /*
  * Process a bmap update intent item that was recovered from the log.
  * We need to update some inode's bmbt.
@@ -433,47 +467,24 @@ xfs_bui_item_recover(
        struct xfs_mount                *mp = lip->li_mountp;
        struct xfs_map_extent           *bmap;
        struct xfs_bud_log_item         *budp;
-       xfs_fsblock_t                   startblock_fsb;
-       xfs_fsblock_t                   inode_fsb;
        xfs_filblks_t                   count;
        xfs_exntst_t                    state;
        unsigned int                    bui_type;
        int                             whichfork;
        int                             error = 0;
 
-       /* Only one mapping operation per BUI... */
-       if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+       if (!xfs_bui_validate(mp, buip)) {
+               XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+                               &buip->bui_format, sizeof(buip->bui_format));
                return -EFSCORRUPTED;
+       }
 
-       /*
-        * First check the validity of the extent described by the
-        * BUI.  If anything is bad, then toss the BUI.
-        */
        bmap = &buip->bui_format.bui_extents[0];
-       startblock_fsb = XFS_BB_TO_FSB(mp,
-                          XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
-       inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
-                       XFS_INO_TO_FSB(mp, bmap->me_owner)));
        state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
                        XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
        whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
                        XFS_ATTR_FORK : XFS_DATA_FORK;
        bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
-       switch (bui_type) {
-       case XFS_BMAP_MAP:
-       case XFS_BMAP_UNMAP:
-               break;
-       default:
-               return -EFSCORRUPTED;
-       }
-       if (startblock_fsb == 0 ||
-           bmap->me_len == 0 ||
-           inode_fsb == 0 ||
-           startblock_fsb >= mp->m_sb.sb_dblocks ||
-           bmap->me_len >= mp->m_sb.sb_agblocks ||
-           inode_fsb >= mp->m_sb.sb_dblocks ||
-           (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
-               return -EFSCORRUPTED;
 
        /* Grab the inode. */
        error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
index 4e4cf91..f8400bb 100644 (file)
@@ -278,7 +278,7 @@ _xfs_buf_alloc(
  */
 STATIC int
 _xfs_buf_get_pages(
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        int                     page_count)
 {
        /* Make sure that we have a page list */
@@ -302,7 +302,7 @@ _xfs_buf_get_pages(
  */
 STATIC void
 _xfs_buf_free_pages(
-       xfs_buf_t       *bp)
+       struct xfs_buf  *bp)
 {
        if (bp->b_pages != bp->b_page_array) {
                kmem_free(bp->b_pages);
@@ -319,7 +319,7 @@ _xfs_buf_free_pages(
  */
 static void
 xfs_buf_free(
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        trace_xfs_buf_free(bp, _RET_IP_);
 
@@ -352,7 +352,7 @@ xfs_buf_free(
  */
 STATIC int
 xfs_buf_allocate_memory(
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        uint                    flags)
 {
        size_t                  size;
@@ -463,7 +463,7 @@ out_free_pages:
  */
 STATIC int
 _xfs_buf_map_pages(
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        uint                    flags)
 {
        ASSERT(bp->b_flags & _XBF_PAGES);
@@ -590,7 +590,7 @@ xfs_buf_find(
        struct xfs_buf          **found_bp)
 {
        struct xfs_perag        *pag;
-       xfs_buf_t               *bp;
+       struct xfs_buf          *bp;
        struct xfs_buf_map      cmap = { .bm_bn = map[0].bm_bn };
        xfs_daddr_t             eofs;
        int                     i;
@@ -762,7 +762,7 @@ found:
 
 int
 _xfs_buf_read(
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        xfs_buf_flags_t         flags)
 {
        ASSERT(!(flags & XBF_WRITE));
@@ -1005,7 +1005,7 @@ xfs_buf_get_uncached(
  */
 void
 xfs_buf_hold(
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        trace_xfs_buf_hold(bp, _RET_IP_);
        atomic_inc(&bp->b_hold);
@@ -1017,7 +1017,7 @@ xfs_buf_hold(
  */
 void
 xfs_buf_rele(
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_perag        *pag = bp->b_pag;
        bool                    release;
@@ -1161,7 +1161,7 @@ xfs_buf_unlock(
 
 STATIC void
 xfs_buf_wait_unpin(
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        DECLARE_WAITQUEUE       (wait, current);
 
@@ -1373,7 +1373,7 @@ xfs_buf_ioend_work(
        struct work_struct      *work)
 {
        struct xfs_buf          *bp =
-               container_of(work, xfs_buf_t, b_ioend_work);
+               container_of(work, struct xfs_buf, b_ioend_work);
 
        xfs_buf_ioend(bp);
 }
@@ -1388,7 +1388,7 @@ xfs_buf_ioend_async(
 
 void
 __xfs_buf_ioerror(
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        int                     error,
        xfs_failaddr_t          failaddr)
 {
index bfd2907..5d91a31 100644 (file)
@@ -124,7 +124,7 @@ struct xfs_buf_ops {
        xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp);
 };
 
-typedef struct xfs_buf {
+struct xfs_buf {
        /*
         * first cacheline holds all the fields needed for an uncontended cache
         * hit to be fully processed. The semaphore straddles the cacheline
@@ -190,7 +190,7 @@ typedef struct xfs_buf {
        int                     b_last_error;
 
        const struct xfs_buf_ops        *b_ops;
-} xfs_buf_t;
+};
 
 /* Finding and Reading Buffers */
 struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
@@ -253,16 +253,16 @@ int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags);
 void xfs_buf_hold(struct xfs_buf *bp);
 
 /* Releasing Buffers */
-extern void xfs_buf_rele(xfs_buf_t *);
+extern void xfs_buf_rele(struct xfs_buf *);
 
 /* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
+extern int xfs_buf_trylock(struct xfs_buf *);
+extern void xfs_buf_lock(struct xfs_buf *);
+extern void xfs_buf_unlock(struct xfs_buf *);
 #define xfs_buf_islocked(bp) \
        ((bp)->b_sema.count <= 0)
 
-static inline void xfs_buf_relse(xfs_buf_t *bp)
+static inline void xfs_buf_relse(struct xfs_buf *bp)
 {
        xfs_buf_unlock(bp);
        xfs_buf_rele(bp);
index 0356f2e..dc0be2a 100644 (file)
@@ -412,7 +412,7 @@ xfs_buf_item_unpin(
        int                     remove)
 {
        struct xfs_buf_log_item *bip = BUF_ITEM(lip);
-       xfs_buf_t               *bp = bip->bli_buf;
+       struct xfs_buf          *bp = bip->bli_buf;
        int                     stale = bip->bli_flags & XFS_BLI_STALE;
        int                     freed;
 
@@ -942,7 +942,7 @@ xfs_buf_item_free(
  */
 void
 xfs_buf_item_relse(
-       xfs_buf_t       *bp)
+       struct xfs_buf  *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
index 6c11bfc..93223eb 100644 (file)
@@ -578,6 +578,15 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
        .cancel_item    = xfs_extent_free_cancel_item,
 };
 
+/* Is this recovered EFI ok? */
+static inline bool
+xfs_efi_validate_ext(
+       struct xfs_mount                *mp,
+       struct xfs_extent               *extp)
+{
+       return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len);
+}
+
 /*
  * Process an extent free intent item that was recovered from
  * the log.  We need to free the extents that it describes.
@@ -592,7 +601,6 @@ xfs_efi_item_recover(
        struct xfs_efd_log_item         *efdp;
        struct xfs_trans                *tp;
        struct xfs_extent               *extp;
-       xfs_fsblock_t                   startblock_fsb;
        int                             i;
        int                             error = 0;
 
@@ -602,14 +610,13 @@ xfs_efi_item_recover(
         * just toss the EFI.
         */
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
-               extp = &efip->efi_format.efi_extents[i];
-               startblock_fsb = XFS_BB_TO_FSB(mp,
-                                  XFS_FSB_TO_DADDR(mp, extp->ext_start));
-               if (startblock_fsb == 0 ||
-                   extp->ext_len == 0 ||
-                   startblock_fsb >= mp->m_sb.sb_dblocks ||
-                   extp->ext_len >= mp->m_sb.sb_agblocks)
+               if (!xfs_efi_validate_ext(mp,
+                                       &efip->efi_format.efi_extents[i])) {
+                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+                                       &efip->efi_format,
+                                       sizeof(efip->efi_format));
                        return -EFSCORRUPTED;
+               }
        }
 
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
index b7c5783..959ce91 100644 (file)
@@ -28,7 +28,7 @@ xfs_growfs_data_private(
        xfs_mount_t             *mp,            /* mount point for filesystem */
        xfs_growfs_data_t       *in)            /* growfs data input struct */
 {
-       xfs_buf_t               *bp;
+       struct xfs_buf          *bp;
        int                     error;
        xfs_agnumber_t          nagcount;
        xfs_agnumber_t          nagimax = 0;
index 2bfbcf2..b7352bc 100644 (file)
@@ -761,68 +761,26 @@ xfs_inode_inherit_flags2(
 }
 
 /*
- * Allocate an inode on disk and return a copy of its in-core version.
- * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
- * appropriately within the inode.  The uid and gid for the inode are
- * set according to the contents of the given cred structure.
- *
- * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
- * has a free inode available, call xfs_iget() to obtain the in-core
- * version of the allocated inode.  Finally, fill in the inode and
- * log its initial contents.  In this case, ialloc_context would be
- * set to NULL.
- *
- * If xfs_dialloc() does not have an available inode, it will replenish
- * its supply by doing an allocation. Since we can only do one
- * allocation within a transaction without deadlocks, we must commit
- * the current transaction before returning the inode itself.
- * In this case, therefore, we will set ialloc_context and return.
- * The caller should then commit the current transaction, start a new
- * transaction, and call xfs_ialloc() again to actually get the inode.
- *
- * To ensure that some other process does not grab the inode that
- * was allocated during the first call to xfs_ialloc(), this routine
- * also returns the [locked] bp pointing to the head of the freelist
- * as ialloc_context.  The caller should hold this buffer across
- * the commit and pass it back into this routine on the second call.
- *
- * If we are allocating quota inodes, we do not have a parent inode
- * to attach to or associate with (i.e. pip == NULL) because they
- * are not linked into the directory structure - they are attached
- * directly to the superblock - and so have no parent.
+ * Initialise a newly allocated inode and return the in-core inode to the
+ * caller locked exclusively.
  */
 static int
-xfs_ialloc(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *pip,
-       umode_t         mode,
-       xfs_nlink_t     nlink,
-       dev_t           rdev,
-       prid_t          prid,
-       xfs_buf_t       **ialloc_context,
-       xfs_inode_t     **ipp)
-{
-       struct xfs_mount *mp = tp->t_mountp;
-       xfs_ino_t       ino;
-       xfs_inode_t     *ip;
-       uint            flags;
-       int             error;
-       struct timespec64 tv;
-       struct inode    *inode;
-
-       /*
-        * Call the space management code to pick
-        * the on-disk inode to be allocated.
-        */
-       error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
-                           ialloc_context, &ino);
-       if (error)
-               return error;
-       if (*ialloc_context || ino == NULLFSINO) {
-               *ipp = NULL;
-               return 0;
-       }
-       ASSERT(*ialloc_context == NULL);
+xfs_init_new_inode(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *pip,
+       xfs_ino_t               ino,
+       umode_t                 mode,
+       xfs_nlink_t             nlink,
+       dev_t                   rdev,
+       prid_t                  prid,
+       struct xfs_inode        **ipp)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_inode        *ip;
+       unsigned int            flags;
+       int                     error;
+       struct timespec64       tv;
+       struct inode            *inode;
 
        /*
         * Protect against obviously corrupt allocation btree records. Later
@@ -837,14 +795,13 @@ xfs_ialloc(
        }
 
        /*
-        * Get the in-core inode with the lock held exclusively.
-        * This is because we're setting fields here we need
-        * to prevent others from looking at until we're done.
+        * Get the in-core inode with the lock held exclusively to prevent
+        * others from looking at until we're done.
         */
-       error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
-                        XFS_ILOCK_EXCL, &ip);
+       error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
        if (error)
                return error;
+
        ASSERT(ip != NULL);
        inode = VFS_I(ip);
        inode->i_mode = mode;
@@ -932,143 +889,51 @@ xfs_ialloc(
 }
 
 /*
- * Allocates a new inode from disk and return a pointer to the
- * incore copy. This routine will internally commit the current
- * transaction and allocate a new one if the Space Manager needed
- * to do an allocation to replenish the inode free-list.
- *
- * This routine is designed to be called from xfs_create and
- * xfs_create_dir.
+ * Allocates a new inode from disk and return a pointer to the incore copy. This
+ * routine will internally commit the current transaction and allocate a new one
+ * if we needed to allocate more on-disk free inodes to perform the requested
+ * operation.
  *
+ * If we are allocating quota inodes, we do not have a parent inode to attach to
+ * or associate with (i.e. dp == NULL) because they are not linked into the
+ * directory structure - they are attached directly to the superblock - and so
+ * have no parent.
  */
 int
 xfs_dir_ialloc(
-       xfs_trans_t     **tpp,          /* input: current transaction;
-                                          output: may be a new transaction. */
-       xfs_inode_t     *dp,            /* directory within whose allocate
-                                          the inode. */
-       umode_t         mode,
-       xfs_nlink_t     nlink,
-       dev_t           rdev,
-       prid_t          prid,           /* project id */
-       xfs_inode_t     **ipp)          /* pointer to inode; it will be
-                                          locked. */
-{
-       xfs_trans_t     *tp;
-       xfs_inode_t     *ip;
-       xfs_buf_t       *ialloc_context = NULL;
-       int             code;
-       void            *dqinfo;
-       uint            tflags;
-
-       tp = *tpp;
-       ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+       struct xfs_trans        **tpp,
+       struct xfs_inode        *dp,
+       umode_t                 mode,
+       xfs_nlink_t             nlink,
+       dev_t                   rdev,
+       prid_t                  prid,
+       struct xfs_inode        **ipp)
+{
+       struct xfs_buf          *agibp;
+       xfs_ino_t               parent_ino = dp ? dp->i_ino : 0;
+       xfs_ino_t               ino;
+       int                     error;
 
-       /*
-        * xfs_ialloc will return a pointer to an incore inode if
-        * the Space Manager has an available inode on the free
-        * list. Otherwise, it will do an allocation and replenish
-        * the freelist.  Since we can only do one allocation per
-        * transaction without deadlocks, we will need to commit the
-        * current transaction and start a new one.  We will then
-        * need to call xfs_ialloc again to get the inode.
-        *
-        * If xfs_ialloc did an allocation to replenish the freelist,
-        * it returns the bp containing the head of the freelist as
-        * ialloc_context. We will hold a lock on it across the
-        * transaction commit so that no other process can steal
-        * the inode(s) that we've just allocated.
-        */
-       code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
-                       &ip);
+       ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
        /*
-        * Return an error if we were unable to allocate a new inode.
-        * This should only happen if we run out of space on disk or
-        * encounter a disk error.
+        * Call the space management code to pick the on-disk inode to be
+        * allocated.
         */
-       if (code) {
-               *ipp = NULL;
-               return code;
-       }
-       if (!ialloc_context && !ip) {
-               *ipp = NULL;
-               return -ENOSPC;
-       }
-
-       /*
-        * If the AGI buffer is non-NULL, then we were unable to get an
-        * inode in one operation.  We need to commit the current
-        * transaction and call xfs_ialloc() again.  It is guaranteed
-        * to succeed the second time.
-        */
-       if (ialloc_context) {
-               /*
-                * Normally, xfs_trans_commit releases all the locks.
-                * We call bhold to hang on to the ialloc_context across
-                * the commit.  Holding this buffer prevents any other
-                * processes from doing any allocations in this
-                * allocation group.
-                */
-               xfs_trans_bhold(tp, ialloc_context);
-
-               /*
-                * We want the quota changes to be associated with the next
-                * transaction, NOT this one. So, detach the dqinfo from this
-                * and attach it to the next transaction.
-                */
-               dqinfo = NULL;
-               tflags = 0;
-               if (tp->t_dqinfo) {
-                       dqinfo = (void *)tp->t_dqinfo;
-                       tp->t_dqinfo = NULL;
-                       tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
-                       tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
-               }
-
-               code = xfs_trans_roll(&tp);
-
-               /*
-                * Re-attach the quota info that we detached from prev trx.
-                */
-               if (dqinfo) {
-                       tp->t_dqinfo = dqinfo;
-                       tp->t_flags |= tflags;
-               }
-
-               if (code) {
-                       xfs_buf_relse(ialloc_context);
-                       *tpp = tp;
-                       *ipp = NULL;
-                       return code;
-               }
-               xfs_trans_bjoin(tp, ialloc_context);
-
-               /*
-                * Call ialloc again. Since we've locked out all
-                * other allocations in this allocation group,
-                * this call should always succeed.
-                */
-               code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-                                 &ialloc_context, &ip);
-
-               /*
-                * If we get an error at this point, return to the caller
-                * so that the current transaction can be aborted.
-                */
-               if (code) {
-                       *tpp = tp;
-                       *ipp = NULL;
-                       return code;
-               }
-               ASSERT(!ialloc_context && ip);
+       error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp);
+       if (error)
+               return error;
 
-       }
+       if (!agibp)
+               return -ENOSPC;
 
-       *ipp = ip;
-       *tpp = tp;
+       /* Allocate an inode from the selected AG */
+       error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino);
+       if (error)
+               return error;
+       ASSERT(ino != NULLFSINO);
 
-       return 0;
+       return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp);
 }
 
 /*
@@ -1521,7 +1386,7 @@ xfs_itruncate_extents_flags(
         * the page cache can't scale that far.
         */
        first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-       if (first_unmap_block >= XFS_MAX_FILEOFF) {
+       if (!xfs_verify_fileoff(mp, first_unmap_block)) {
                WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
                return 0;
        }
index 751a3d1..eca333f 100644 (file)
@@ -407,9 +407,9 @@ void                xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
 xfs_extlen_t   xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t   xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
-int            xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
-                              xfs_nlink_t, dev_t, prid_t,
-                              struct xfs_inode **);
+int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode,
+                  xfs_nlink_t nlink, dev_t dev, prid_t prid,
+                  struct xfs_inode **ipp);
 
 static inline int
 xfs_itruncate_extents(
index 1414ab7..67c8dc9 100644 (file)
@@ -206,10 +206,8 @@ xfs_generic_create(
        xfs_finish_inode_setup(ip);
 
  out_free_acl:
-       if (default_acl)
-               posix_acl_release(default_acl);
-       if (acl)
-               posix_acl_release(acl);
+       posix_acl_release(default_acl);
+       posix_acl_release(acl);
        return error;
 
  out_cleanup_inode:
@@ -648,11 +646,10 @@ xfs_vn_change_ok(
  * Caution: The caller of this function is responsible for calling
  * setattr_prepare() or otherwise verifying the change is fine.
  */
-int
+static int
 xfs_setattr_nonsize(
        struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
+       struct iattr            *iattr)
 {
        xfs_mount_t             *mp = ip->i_mount;
        struct inode            *inode = VFS_I(ip);
@@ -809,7 +806,7 @@ xfs_setattr_nonsize(
         *           to attr_set.  No previous user of the generic
         *           Posix ACL code seems to care about this issue either.
         */
-       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+       if (mask & ATTR_MODE) {
                error = posix_acl_chmod(inode, inode->i_mode);
                if (error)
                        return error;
@@ -826,22 +823,6 @@ out_dqrele:
        return error;
 }
 
-int
-xfs_vn_setattr_nonsize(
-       struct dentry           *dentry,
-       struct iattr            *iattr)
-{
-       struct xfs_inode        *ip = XFS_I(d_inode(dentry));
-       int error;
-
-       trace_xfs_setattr(ip);
-
-       error = xfs_vn_change_ok(dentry, iattr);
-       if (error)
-               return error;
-       return xfs_setattr_nonsize(ip, iattr, 0);
-}
-
 /*
  * Truncate file.  Must have write permission and not be a directory.
  *
@@ -881,7 +862,7 @@ xfs_setattr_size(
                 * Use the regular setattr path to update the timestamps.
                 */
                iattr->ia_valid &= ~ATTR_SIZE;
-               return xfs_setattr_nonsize(ip, iattr, 0);
+               return xfs_setattr_nonsize(ip, iattr);
        }
 
        /*
@@ -1069,11 +1050,11 @@ xfs_vn_setattr(
        struct dentry           *dentry,
        struct iattr            *iattr)
 {
+       struct inode            *inode = d_inode(dentry);
+       struct xfs_inode        *ip = XFS_I(inode);
        int                     error;
 
        if (iattr->ia_valid & ATTR_SIZE) {
-               struct inode            *inode = d_inode(dentry);
-               struct xfs_inode        *ip = XFS_I(inode);
                uint                    iolock;
 
                xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
@@ -1088,7 +1069,11 @@ xfs_vn_setattr(
                error = xfs_vn_setattr_size(dentry, iattr);
                xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
        } else {
-               error = xfs_vn_setattr_nonsize(dentry, iattr);
+               trace_xfs_setattr(ip);
+
+               error = xfs_vn_change_ok(dentry, iattr);
+               if (!error)
+                       error = xfs_setattr_nonsize(ip, iattr);
        }
 
        return error;
index 4d24ff3..99ca745 100644 (file)
@@ -13,15 +13,7 @@ extern const struct file_operations xfs_dir_file_operations;
 
 extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
-/*
- * Internal setattr interfaces.
- */
-#define XFS_ATTR_NOACL         0x01    /* Don't call posix_acl_chmod */
-
 extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
-extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
-                              int flags);
-extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap);
 extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap);
 
 #endif /* __XFS_IOPS_H__ */
index 2a45138..eae3aff 100644 (file)
@@ -363,7 +363,7 @@ xfs_iwalk_run_callbacks(
        /* Delete cursor but remember the last record we cached... */
        xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
        irec = &iwag->recs[iwag->nr_recs - 1];
-       ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
+       ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK);
 
        error = xfs_iwalk_ag_recs(iwag);
        if (error)
index 87886b7..97f3130 100644 (file)
@@ -2559,8 +2559,11 @@ xlog_recover_process_intents(
                spin_unlock(&ailp->ail_lock);
                error = lip->li_ops->iop_recover(lip, &capture_list);
                spin_lock(&ailp->ail_lock);
-               if (error)
+               if (error) {
+                       trace_xlog_intent_recovery_failed(log->l_mp, error,
+                                       lip->li_ops->iop_recover);
                        break;
+               }
        }
 
        xfs_trans_ail_cursor_done(&cur);
@@ -2628,7 +2631,7 @@ xlog_recover_clear_agi_bucket(
 {
        xfs_trans_t     *tp;
        xfs_agi_t       *agi;
-       xfs_buf_t       *agibp;
+       struct xfs_buf  *agibp;
        int             offset;
        int             error;
 
@@ -2746,7 +2749,7 @@ xlog_recover_process_iunlinks(
        xfs_mount_t     *mp;
        xfs_agnumber_t  agno;
        xfs_agi_t       *agi;
-       xfs_buf_t       *agibp;
+       struct xfs_buf  *agibp;
        xfs_agino_t     agino;
        int             bucket;
        int             error;
@@ -3498,8 +3501,8 @@ xlog_recover_check_summary(
        struct xlog     *log)
 {
        xfs_mount_t     *mp;
-       xfs_buf_t       *agfbp;
-       xfs_buf_t       *agibp;
+       struct xfs_buf  *agfbp;
+       struct xfs_buf  *agibp;
        xfs_agnumber_t  agno;
        uint64_t        freeblks;
        uint64_t        itotal;
index b2a9abe..c134eb4 100644 (file)
@@ -737,15 +737,15 @@ xfs_qm_destroy_quotainfo(
  */
 STATIC int
 xfs_qm_qino_alloc(
-       xfs_mount_t     *mp,
-       xfs_inode_t     **ip,
-       uint            flags)
+       struct xfs_mount        *mp,
+       struct xfs_inode        **ipp,
+       unsigned int            flags)
 {
-       xfs_trans_t     *tp;
-       int             error;
-       bool            need_alloc = true;
+       struct xfs_trans        *tp;
+       int                     error;
+       bool                    need_alloc = true;
 
-       *ip = NULL;
+       *ipp = NULL;
        /*
         * With superblock that doesn't have separate pquotino, we
         * share an inode between gquota and pquota. If the on-disk
@@ -771,7 +771,7 @@ xfs_qm_qino_alloc(
                                return -EFSCORRUPTED;
                }
                if (ino != NULLFSINO) {
-                       error = xfs_iget(mp, NULL, ino, 0, 0, ip);
+                       error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
                        if (error)
                                return error;
                        mp->m_sb.sb_gquotino = NULLFSINO;
@@ -787,7 +787,7 @@ xfs_qm_qino_alloc(
                return error;
 
        if (need_alloc) {
-               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip);
+               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp);
                if (error) {
                        xfs_trans_cancel(tp);
                        return error;
@@ -812,11 +812,11 @@ xfs_qm_qino_alloc(
                mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
        }
        if (flags & XFS_QMOPT_UQUOTA)
-               mp->m_sb.sb_uquotino = (*ip)->i_ino;
+               mp->m_sb.sb_uquotino = (*ipp)->i_ino;
        else if (flags & XFS_QMOPT_GQUOTA)
-               mp->m_sb.sb_gquotino = (*ip)->i_ino;
+               mp->m_sb.sb_gquotino = (*ipp)->i_ino;
        else
-               mp->m_sb.sb_pquotino = (*ip)->i_ino;
+               mp->m_sb.sb_pquotino = (*ipp)->i_ino;
        spin_unlock(&mp->m_sb_lock);
        xfs_log_sb(tp);
 
@@ -826,7 +826,7 @@ xfs_qm_qino_alloc(
                xfs_alert(mp, "%s failed (error %d)!", __func__, error);
        }
        if (need_alloc)
-               xfs_finish_inode_setup(*ip);
+               xfs_finish_inode_setup(*ipp);
        return error;
 }
 
index 7529eb6..07ebccb 100644 (file)
@@ -417,6 +417,31 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
        .cancel_item    = xfs_refcount_update_cancel_item,
 };
 
+/* Is this recovered CUI ok? */
+static inline bool
+xfs_cui_validate_phys(
+       struct xfs_mount                *mp,
+       struct xfs_phys_extent          *refc)
+{
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return false;
+
+       if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)
+               return false;
+
+       switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
+       case XFS_REFCOUNT_INCREASE:
+       case XFS_REFCOUNT_DECREASE:
+       case XFS_REFCOUNT_ALLOC_COW:
+       case XFS_REFCOUNT_FREE_COW:
+               break;
+       default:
+               return false;
+       }
+
+       return xfs_verify_fsbext(mp, refc->pe_startblock, refc->pe_len);
+}
+
 /*
  * Process a refcount update intent item that was recovered from the log.
  * We need to update the refcountbt.
@@ -433,11 +458,9 @@ xfs_cui_item_recover(
        struct xfs_trans                *tp;
        struct xfs_btree_cur            *rcur = NULL;
        struct xfs_mount                *mp = lip->li_mountp;
-       xfs_fsblock_t                   startblock_fsb;
        xfs_fsblock_t                   new_fsb;
        xfs_extlen_t                    new_len;
        unsigned int                    refc_type;
-       bool                            op_ok;
        bool                            requeue_only = false;
        enum xfs_refcount_intent_type   type;
        int                             i;
@@ -449,26 +472,13 @@ xfs_cui_item_recover(
         * just toss the CUI.
         */
        for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
-               refc = &cuip->cui_format.cui_extents[i];
-               startblock_fsb = XFS_BB_TO_FSB(mp,
-                                  XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
-               switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
-               case XFS_REFCOUNT_INCREASE:
-               case XFS_REFCOUNT_DECREASE:
-               case XFS_REFCOUNT_ALLOC_COW:
-               case XFS_REFCOUNT_FREE_COW:
-                       op_ok = true;
-                       break;
-               default:
-                       op_ok = false;
-                       break;
-               }
-               if (!op_ok || startblock_fsb == 0 ||
-                   refc->pe_len == 0 ||
-                   startblock_fsb >= mp->m_sb.sb_dblocks ||
-                   refc->pe_len >= mp->m_sb.sb_agblocks ||
-                   (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS))
+               if (!xfs_cui_validate_phys(mp,
+                                       &cuip->cui_format.cui_extents[i])) {
+                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+                                       &cuip->cui_format,
+                                       sizeof(cuip->cui_format));
                        return -EFSCORRUPTED;
+               }
        }
 
        /*
index 7adc996..49cebd6 100644 (file)
@@ -460,6 +460,42 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
        .cancel_item    = xfs_rmap_update_cancel_item,
 };
 
+/* Is this recovered RUI ok? */
+static inline bool
+xfs_rui_validate_map(
+       struct xfs_mount                *mp,
+       struct xfs_map_extent           *rmap)
+{
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return false;
+
+       if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)
+               return false;
+
+       switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
+       case XFS_RMAP_EXTENT_MAP:
+       case XFS_RMAP_EXTENT_MAP_SHARED:
+       case XFS_RMAP_EXTENT_UNMAP:
+       case XFS_RMAP_EXTENT_UNMAP_SHARED:
+       case XFS_RMAP_EXTENT_CONVERT:
+       case XFS_RMAP_EXTENT_CONVERT_SHARED:
+       case XFS_RMAP_EXTENT_ALLOC:
+       case XFS_RMAP_EXTENT_FREE:
+               break;
+       default:
+               return false;
+       }
+
+       if (!XFS_RMAP_NON_INODE_OWNER(rmap->me_owner) &&
+           !xfs_verify_ino(mp, rmap->me_owner))
+               return false;
+
+       if (!xfs_verify_fileext(mp, rmap->me_startoff, rmap->me_len))
+               return false;
+
+       return xfs_verify_fsbext(mp, rmap->me_startblock, rmap->me_len);
+}
+
 /*
  * Process an rmap update intent item that was recovered from the log.
  * We need to update the rmapbt.
@@ -475,10 +511,8 @@ xfs_rui_item_recover(
        struct xfs_trans                *tp;
        struct xfs_btree_cur            *rcur = NULL;
        struct xfs_mount                *mp = lip->li_mountp;
-       xfs_fsblock_t                   startblock_fsb;
        enum xfs_rmap_intent_type       type;
        xfs_exntst_t                    state;
-       bool                            op_ok;
        int                             i;
        int                             whichfork;
        int                             error = 0;
@@ -489,30 +523,13 @@ xfs_rui_item_recover(
         * just toss the RUI.
         */
        for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
-               rmap = &ruip->rui_format.rui_extents[i];
-               startblock_fsb = XFS_BB_TO_FSB(mp,
-                                  XFS_FSB_TO_DADDR(mp, rmap->me_startblock));
-               switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
-               case XFS_RMAP_EXTENT_MAP:
-               case XFS_RMAP_EXTENT_MAP_SHARED:
-               case XFS_RMAP_EXTENT_UNMAP:
-               case XFS_RMAP_EXTENT_UNMAP_SHARED:
-               case XFS_RMAP_EXTENT_CONVERT:
-               case XFS_RMAP_EXTENT_CONVERT_SHARED:
-               case XFS_RMAP_EXTENT_ALLOC:
-               case XFS_RMAP_EXTENT_FREE:
-                       op_ok = true;
-                       break;
-               default:
-                       op_ok = false;
-                       break;
-               }
-               if (!op_ok || startblock_fsb == 0 ||
-                   rmap->me_len == 0 ||
-                   startblock_fsb >= mp->m_sb.sb_dblocks ||
-                   rmap->me_len >= mp->m_sb.sb_agblocks ||
-                   (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS))
+               if (!xfs_rui_validate_map(mp,
+                                       &ruip->rui_format.rui_extents[i])) {
+                       XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+                                       &ruip->rui_format,
+                                       sizeof(ruip->rui_format));
                        return -EFSCORRUPTED;
+               }
        }
 
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
index ede1baf..b4999fb 100644 (file)
@@ -32,7 +32,7 @@ xfs_rtget_summary(
        xfs_trans_t     *tp,            /* transaction pointer */
        int             log,            /* log2 of extent size */
        xfs_rtblock_t   bbno,           /* bitmap block number */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        xfs_suminfo_t   *sum)           /* out: summary info for this block */
 {
@@ -50,7 +50,7 @@ xfs_rtany_summary(
        int             low,            /* low log2 extent size */
        int             high,           /* high log2 extent size */
        xfs_rtblock_t   bbno,           /* bitmap block number */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        int             *stat)          /* out: any good extents here? */
 {
@@ -104,7 +104,7 @@ xfs_rtcopy_summary(
        xfs_trans_t     *tp)            /* transaction pointer */
 {
        xfs_rtblock_t   bbno;           /* bitmap block number */
-       xfs_buf_t       *bp;            /* summary buffer */
+       struct xfs_buf  *bp;            /* summary buffer */
        int             error;          /* error return value */
        int             log;            /* summary level number (log length) */
        xfs_suminfo_t   sum;            /* summary data */
@@ -144,7 +144,7 @@ xfs_rtallocate_range(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_rtblock_t   start,          /* start block to allocate */
        xfs_extlen_t    len,            /* length to allocate */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb)           /* in/out: summary block number */
 {
        xfs_rtblock_t   end;            /* end of the allocated extent */
@@ -226,7 +226,7 @@ xfs_rtallocate_extent_block(
        xfs_extlen_t    maxlen,         /* maximum length to allocate */
        xfs_extlen_t    *len,           /* out: actual length allocated */
        xfs_rtblock_t   *nextp,         /* out: next block to try */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        xfs_extlen_t    prod,           /* extent product factor */
        xfs_rtblock_t   *rtblock)       /* out: start block allocated */
@@ -345,7 +345,7 @@ xfs_rtallocate_extent_exact(
        xfs_extlen_t    minlen,         /* minimum length to allocate */
        xfs_extlen_t    maxlen,         /* maximum length to allocate */
        xfs_extlen_t    *len,           /* out: actual length allocated */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        xfs_extlen_t    prod,           /* extent product factor */
        xfs_rtblock_t   *rtblock)       /* out: start block allocated */
@@ -424,7 +424,7 @@ xfs_rtallocate_extent_near(
        xfs_extlen_t    minlen,         /* minimum length to allocate */
        xfs_extlen_t    maxlen,         /* maximum length to allocate */
        xfs_extlen_t    *len,           /* out: actual length allocated */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        xfs_extlen_t    prod,           /* extent product factor */
        xfs_rtblock_t   *rtblock)       /* out: start block allocated */
@@ -626,7 +626,7 @@ xfs_rtallocate_extent_size(
        xfs_extlen_t    minlen,         /* minimum length to allocate */
        xfs_extlen_t    maxlen,         /* maximum length to allocate */
        xfs_extlen_t    *len,           /* out: actual length allocated */
-       xfs_buf_t       **rbpp,         /* in/out: summary block buffer */
+       struct xfs_buf  **rbpp,         /* in/out: summary block buffer */
        xfs_fsblock_t   *rsb,           /* in/out: summary block number */
        xfs_extlen_t    prod,           /* extent product factor */
        xfs_rtblock_t   *rtblock)       /* out: start block allocated */
@@ -900,7 +900,7 @@ xfs_growfs_rt(
        xfs_growfs_rt_t *in)            /* growfs rt input struct */
 {
        xfs_rtblock_t   bmbno;          /* bitmap block number */
-       xfs_buf_t       *bp;            /* temporary buffer */
+       struct xfs_buf  *bp;            /* temporary buffer */
        int             error;          /* error return value */
        xfs_mount_t     *nmp;           /* new (fake) mount structure */
        xfs_rfsblock_t  nrblocks;       /* new number of realtime blocks */
@@ -1151,7 +1151,7 @@ xfs_rtallocate_extent(
        int             error;          /* error value */
        xfs_rtblock_t   r;              /* result allocated block */
        xfs_fsblock_t   sb;             /* summary file block number */
-       xfs_buf_t       *sumbp;         /* summary file block buffer */
+       struct xfs_buf  *sumbp;         /* summary file block buffer */
 
        ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
        ASSERT(minlen > 0 && minlen <= maxlen);
index 93e77b2..ed88562 100644 (file)
@@ -115,10 +115,10 @@ int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
                       xfs_rtblock_t start, xfs_extlen_t len, int val);
 int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
                             int log, xfs_rtblock_t bbno, int delta,
-                            xfs_buf_t **rbpp, xfs_fsblock_t *rsb,
+                            struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
                             xfs_suminfo_t *sum);
 int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
-                        xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp,
+                        xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
                         xfs_fsblock_t *rsb);
 int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
                     xfs_rtblock_t start, xfs_extlen_t len,
index e3e229e..813be87 100644 (file)
@@ -199,10 +199,12 @@ xfs_fs_show_options(
                seq_printf(m, ",swidth=%d",
                                (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
-       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-               seq_puts(m, ",usrquota");
-       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-               seq_puts(m, ",uqnoenforce");
+       if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_UQUOTA_ENFD)
+                       seq_puts(m, ",usrquota");
+               else
+                       seq_puts(m, ",uqnoenforce");
+       }
 
        if (mp->m_qflags & XFS_PQUOTA_ACCT) {
                if (mp->m_qflags & XFS_PQUOTA_ENFD)
@@ -1159,7 +1161,7 @@ suffix_kstrtoint(
  * NOTE: mp->m_super is NULL here!
  */
 static int
-xfs_fc_parse_param(
+xfs_fs_parse_param(
        struct fs_context       *fc,
        struct fs_parameter     *param)
 {
@@ -1317,7 +1319,7 @@ xfs_fc_parse_param(
 }
 
 static int
-xfs_fc_validate_params(
+xfs_fs_validate_params(
        struct xfs_mount        *mp)
 {
        /*
@@ -1386,7 +1388,7 @@ xfs_fc_validate_params(
 }
 
 static int
-xfs_fc_fill_super(
+xfs_fs_fill_super(
        struct super_block      *sb,
        struct fs_context       *fc)
 {
@@ -1396,7 +1398,7 @@ xfs_fc_fill_super(
 
        mp->m_super = sb;
 
-       error = xfs_fc_validate_params(mp);
+       error = xfs_fs_validate_params(mp);
        if (error)
                goto out_free_names;
 
@@ -1467,6 +1469,45 @@ xfs_fc_fill_super(
 #endif
        }
 
+       /* Filesystem claims it needs repair, so refuse the mount. */
+       if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+               xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
+               error = -EFSCORRUPTED;
+               goto out_free_sb;
+       }
+
+       /*
+        * Don't touch the filesystem if a user tool thinks it owns the primary
+        * superblock.  mkfs doesn't clear the flag from secondary supers, so
+        * we don't check them at all.
+        */
+       if (mp->m_sb.sb_inprogress) {
+               xfs_warn(mp, "Offline file system operation in progress!");
+               error = -EFSCORRUPTED;
+               goto out_free_sb;
+       }
+
+       /*
+        * Until this is fixed only page-sized or smaller data blocks work.
+        */
+       if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
+               xfs_warn(mp,
+               "File system with blocksize %d bytes. "
+               "Only pagesize (%ld) or less will currently work.",
+                               mp->m_sb.sb_blocksize, PAGE_SIZE);
+               error = -ENOSYS;
+               goto out_free_sb;
+       }
+
+       /* Ensure this filesystem fits in the page cache limits */
+       if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
+           xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
+               xfs_warn(mp,
+               "file system too large to be mounted on this system.");
+               error = -EFBIG;
+               goto out_free_sb;
+       }
+
        /*
         * XFS block mappings use 54 bits to store the logical block offset.
         * This should suffice to handle the maximum file size that the VFS
@@ -1478,7 +1519,7 @@ xfs_fc_fill_super(
         * Avoid integer overflow by comparing the maximum bmbt offset to the
         * maximum pagecache offset in units of fs blocks.
         */
-       if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) {
+       if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
                xfs_warn(mp,
 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
                         XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
@@ -1621,10 +1662,10 @@ xfs_fc_fill_super(
 }
 
 static int
-xfs_fc_get_tree(
+xfs_fs_get_tree(
        struct fs_context       *fc)
 {
-       return get_tree_bdev(fc, xfs_fc_fill_super);
+       return get_tree_bdev(fc, xfs_fs_fill_super);
 }
 
 static int
@@ -1743,7 +1784,7 @@ xfs_remount_ro(
  * silently ignore all options that we can't actually change.
  */
 static int
-xfs_fc_reconfigure(
+xfs_fs_reconfigure(
        struct fs_context *fc)
 {
        struct xfs_mount        *mp = XFS_M(fc->root->d_sb);
@@ -1756,7 +1797,7 @@ xfs_fc_reconfigure(
        if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
                fc->sb_flags |= SB_I_VERSION;
 
-       error = xfs_fc_validate_params(new_mp);
+       error = xfs_fs_validate_params(new_mp);
        if (error)
                return error;
 
@@ -1793,7 +1834,7 @@ xfs_fc_reconfigure(
        return 0;
 }
 
-static void xfs_fc_free(
+static void xfs_fs_free(
        struct fs_context       *fc)
 {
        struct xfs_mount        *mp = fc->s_fs_info;
@@ -1809,10 +1850,10 @@ static void xfs_fc_free(
 }
 
 static const struct fs_context_operations xfs_context_ops = {
-       .parse_param = xfs_fc_parse_param,
-       .get_tree    = xfs_fc_get_tree,
-       .reconfigure = xfs_fc_reconfigure,
-       .free        = xfs_fc_free,
+       .parse_param = xfs_fs_parse_param,
+       .get_tree    = xfs_fs_get_tree,
+       .reconfigure = xfs_fs_reconfigure,
+       .free        = xfs_fs_free,
 };
 
 static int xfs_init_fs_context(
index 8e88a7c..1f43fd7 100644 (file)
@@ -154,7 +154,7 @@ xfs_symlink(
        const char              *cur_chunk;
        int                     byte_cnt;
        int                     n;
-       xfs_buf_t               *bp;
+       struct xfs_buf          *bp;
        prid_t                  prid;
        struct xfs_dquot        *udqp = NULL;
        struct xfs_dquot        *gdqp = NULL;
@@ -365,7 +365,7 @@ STATIC int
 xfs_inactive_symlink_rmt(
        struct xfs_inode *ip)
 {
-       xfs_buf_t       *bp;
+       struct xfs_buf  *bp;
        int             done;
        int             error;
        int             i;
index 8695165..5a263ae 100644 (file)
@@ -103,6 +103,24 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
 DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
 DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
 
+TRACE_EVENT(xlog_intent_recovery_failed,
+       TP_PROTO(struct xfs_mount *mp, int error, void *function),
+       TP_ARGS(mp, error, function),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, error)
+               __field(void *, function)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->error = error;
+               __entry->function = function;
+       ),
+       TP_printk("dev %d:%d error %d function %pS",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->error, __entry->function)
+);
+
 DECLARE_EVENT_CLASS(xfs_perag_class,
        TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
                 unsigned long caller_ip),
index c94e71f..e72730f 100644 (file)
@@ -465,7 +465,7 @@ xfs_trans_apply_sb_deltas(
        xfs_trans_t     *tp)
 {
        xfs_dsb_t       *sbp;
-       xfs_buf_t       *bp;
+       struct xfs_buf  *bp;
        int             whole = 0;
 
        bp = xfs_trans_getsb(tp);
index 42d63b8..9aced0a 100644 (file)
@@ -121,7 +121,7 @@ xfs_trans_get_buf_map(
        xfs_buf_flags_t         flags,
        struct xfs_buf          **bpp)
 {
-       xfs_buf_t               *bp;
+       struct xfs_buf          *bp;
        struct xfs_buf_log_item *bip;
        int                     error;
 
@@ -401,7 +401,7 @@ xfs_trans_brelse(
 void
 xfs_trans_bhold(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
@@ -422,7 +422,7 @@ xfs_trans_bhold(
 void
 xfs_trans_bhold_release(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
@@ -538,7 +538,7 @@ xfs_trans_log_buf(
 void
 xfs_trans_binval(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
        int                     i;
@@ -593,7 +593,7 @@ xfs_trans_binval(
 void
 xfs_trans_inode_buf(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
@@ -618,7 +618,7 @@ xfs_trans_inode_buf(
 void
 xfs_trans_stale_inode_buf(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
@@ -643,7 +643,7 @@ xfs_trans_stale_inode_buf(
 void
 xfs_trans_inode_alloc_buf(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp)
+       struct xfs_buf          *bp)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
 
@@ -737,7 +737,7 @@ xfs_trans_buf_copy_type(
 void
 xfs_trans_dquot_buf(
        xfs_trans_t             *tp,
-       xfs_buf_t               *bp,
+       struct xfs_buf          *bp,
        uint                    type)
 {
        struct xfs_buf_log_item *bip = bp->b_log_item;
index fe45b0c..28b8ac7 100644 (file)
@@ -84,13 +84,6 @@ xfs_trans_dup_dqinfo(
 
        xfs_trans_alloc_dqinfo(ntp);
 
-       /*
-        * Because the quota blk reservation is carried forward,
-        * it is also necessary to carry forward the DQ_DIRTY flag.
-        */
-       if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
-               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
        for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
                oqa = otp->t_dqinfo->dqs[j];
                nqa = ntp->t_dqinfo->dqs[j];
@@ -143,9 +136,6 @@ xfs_trans_mod_dquot_byino(
            xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
                return;
 
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
        if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
                (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
        if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)
@@ -204,6 +194,9 @@ xfs_trans_mod_dquot(
        ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
        qtrx = NULL;
 
+       if (!delta)
+               return;
+
        if (tp->t_dqinfo == NULL)
                xfs_trans_alloc_dqinfo(tp);
        /*
@@ -215,10 +208,8 @@ xfs_trans_mod_dquot(
        if (qtrx->qt_dquot == NULL)
                qtrx->qt_dquot = dqp;
 
-       if (delta) {
-               trace_xfs_trans_mod_dquot_before(qtrx);
-               trace_xfs_trans_mod_dquot(tp, dqp, field, delta);
-       }
+       trace_xfs_trans_mod_dquot_before(qtrx);
+       trace_xfs_trans_mod_dquot(tp, dqp, field, delta);
 
        switch (field) {
        /* regular disk blk reservation */
@@ -271,10 +262,7 @@ xfs_trans_mod_dquot(
                ASSERT(0);
        }
 
-       if (delta)
-               trace_xfs_trans_mod_dquot_after(qtrx);
-
-       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+       trace_xfs_trans_mod_dquot_after(qtrx);
 }
 
 
@@ -351,7 +339,7 @@ xfs_trans_apply_dquot_deltas(
        int64_t                 totalbdelta;
        int64_t                 totalrtbdelta;
 
-       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+       if (!tp->t_dqinfo)
                return;
 
        ASSERT(tp->t_dqinfo);
@@ -493,7 +481,7 @@ xfs_trans_unreserve_and_mod_dquots(
        struct xfs_dqtrx        *qtrx, *qa;
        bool                    locked;
 
-       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+       if (!tp->t_dqinfo)
                return;
 
        for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
@@ -698,16 +686,10 @@ xfs_trans_dqresv(
         * because we don't have the luxury of a transaction envelope then.
         */
        if (tp) {
-               ASSERT(tp->t_dqinfo);
                ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-               if (nblks != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           flags & XFS_QMOPT_RESBLK_MASK,
-                                           nblks);
-               if (ninos != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           XFS_TRANS_DQ_RES_INOS,
-                                           ninos);
+               xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK,
+                                   nblks);
+               xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos);
        }
        ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count);
        ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count);
@@ -752,9 +734,6 @@ xfs_trans_reserve_quota_bydquots(
        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return 0;
 
-       if (tp && tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
        ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
 
        if (udqp) {
index a6a9373..232838d 100644 (file)
@@ -124,11 +124,10 @@ struct cppc_perf_fb_ctrs {
 
 /* Per CPU container for runtime CPPC management. */
 struct cppc_cpudata {
-       int cpu;
+       struct list_head node;
        struct cppc_perf_caps perf_caps;
        struct cppc_perf_ctrls perf_ctrls;
        struct cppc_perf_fb_ctrs perf_fb_ctrs;
-       struct cpufreq_policy *cur_policy;
        unsigned int shared_type;
        cpumask_var_t shared_cpu_map;
 };
@@ -137,7 +136,8 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
-extern int acpi_get_psd_map(struct cppc_cpudata **);
+extern bool acpi_cpc_valid(void);
+extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data);
 extern unsigned int cppc_get_transition_latency(int cpu);
 extern bool cpc_ffh_supported(void);
 extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val);
index 9ea83d8..c6af40c 100644 (file)
@@ -1137,6 +1137,10 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
 }
 #endif
 
+#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
+extern int devmem_is_allowed(unsigned long pfn);
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASM_GENERIC_IO_H */
index 306aa3a..3b273f9 100644 (file)
@@ -100,10 +100,10 @@ struct drm_fb_helper_funcs {
  * @funcs: driver callbacks for fb helper
  * @fbdev: emulated fbdev device info struct
  * @pseudo_palette: fake palette of 16 colors
- * @dirty_clip: clip rectangle used with deferred_io to accumulate damage to
- *              the screen buffer
- * @dirty_lock: spinlock protecting @dirty_clip
- * @dirty_work: worker used to flush the framebuffer
+ * @damage_clip: clip rectangle used with deferred_io to accumulate damage to
+ *                the screen buffer
+ * @damage_lock: spinlock protecting @damage_clip
+ * @damage_work: worker used to flush the framebuffer
  * @resume_work: worker used during resume if the console lock is already taken
  *
  * This is the main structure used by the fbdev helpers. Drivers supporting
@@ -131,9 +131,9 @@ struct drm_fb_helper {
        const struct drm_fb_helper_funcs *funcs;
        struct fb_info *fbdev;
        u32 pseudo_palette[17];
-       struct drm_clip_rect dirty_clip;
-       spinlock_t dirty_lock;
-       struct work_struct dirty_work;
+       struct drm_clip_rect damage_clip;
+       spinlock_t damage_lock;
+       struct work_struct damage_work;
        struct work_struct resume_work;
 
        /**
index 3449a03..434328d 100644 (file)
@@ -98,9 +98,9 @@ struct drm_gem_shmem_object {
        unsigned int vmap_use_count;
 
        /**
-        * @map_cached: map object cached (instead of using writecombine).
+        * @map_wc: map object write-combined (instead of using shmem defaults).
         */
-       bool map_cached;
+       bool map_wc;
 };
 
 #define to_drm_gem_shmem_obj(obj) \
@@ -133,9 +133,6 @@ drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
                                 struct drm_device *dev, size_t size,
                                 uint32_t *handle);
 
-struct drm_gem_object *
-drm_gem_shmem_create_object_cached(struct drm_device *dev, size_t size);
-
 int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
                              struct drm_mode_create_dumb *args);
 
index cdf2a29..a0d79d1 100644 (file)
@@ -195,6 +195,9 @@ enum drm_mode_status {
  * @crtc_vsync_end: hardware mode vertical sync end
  * @crtc_vtotal: hardware mode vertical total size
  *
+ * This is the kernel API display mode information structure. For the
+ * user-space version see struct drm_mode_modeinfo.
+ *
  * The horizontal and vertical timings are defined per the following diagram.
  *
  * ::
index f2de050..16ff3fa 100644 (file)
@@ -1044,9 +1044,8 @@ struct drm_connector_helper_funcs {
         * NOTE:
         *
         * This function is called in the check phase of an atomic update. The
-        * driver is not allowed to change anything outside of the free-standing
-        * state objects passed-in or assembled in the overall &drm_atomic_state
-        * update tracking structure.
+        * driver is not allowed to change anything outside of the
+        * &drm_atomic_state update tracking structure passed in.
         *
         * RETURNS:
         *
@@ -1056,7 +1055,7 @@ struct drm_connector_helper_funcs {
         * for this.
         */
        struct drm_encoder *(*atomic_best_encoder)(struct drm_connector *connector,
-                                                  struct drm_connector_state *connector_state);
+                                                  struct drm_atomic_state *state);
 
        /**
         * @atomic_check:
@@ -1097,15 +1096,15 @@ struct drm_connector_helper_funcs {
         *
         * This hook is to be used by drivers implementing writeback connectors
         * that need a point when to commit the writeback job to the hardware.
-        * The writeback_job to commit is available in
-        * &drm_connector_state.writeback_job.
+        * The writeback_job to commit is available in the new connector state,
+        * in &drm_connector_state.writeback_job.
         *
         * This hook is optional.
         *
         * This callback is used by the atomic modeset helpers.
         */
        void (*atomic_commit)(struct drm_connector *connector,
-                             struct drm_connector_state *state);
+                             struct drm_atomic_state *state);
 
        /**
         * @prepare_writeback_job:
index eba1710..98e1b2a 100644 (file)
 #define PMC_PLLBCK             8
 #define PMC_AUDIOPLLCK         9
 
+/* SAMA7G5 */
+#define PMC_CPUPLL             (PMC_MAIN + 1)
+#define PMC_SYSPLL             (PMC_MAIN + 2)
+#define PMC_DDRPLL             (PMC_MAIN + 3)
+#define PMC_IMGPLL             (PMC_MAIN + 4)
+#define PMC_BAUDPLL            (PMC_MAIN + 5)
+#define PMC_AUDIOPMCPLL                (PMC_MAIN + 6)
+#define PMC_AUDIOIOPLL         (PMC_MAIN + 7)
+#define PMC_ETHPLL             (PMC_MAIN + 8)
+#define PMC_CPU                        (PMC_MAIN + 9)
+
 #ifndef AT91_PMC_MOSCS
 #define AT91_PMC_MOSCS         0               /* MOSCS Flag */
 #define AT91_PMC_LOCKA         1               /* PLLA Lock */
diff --git a/include/dt-bindings/clock/fsl,qoriq-clockgen.h b/include/dt-bindings/clock/fsl,qoriq-clockgen.h
new file mode 100644 (file)
index 0000000..ddec7d0
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef DT_CLOCK_FSL_QORIQ_CLOCKGEN_H
+#define DT_CLOCK_FSL_QORIQ_CLOCKGEN_H
+
+#define QORIQ_CLK_SYSCLK       0
+#define QORIQ_CLK_CMUX         1
+#define QORIQ_CLK_HWACCEL      2
+#define QORIQ_CLK_FMAN         3
+#define QORIQ_CLK_PLATFORM_PLL 4
+#define QORIQ_CLK_CORECLK      5
+
+#define QORIQ_CLK_PLL_DIV(x)   ((x) - 1)
+
+#endif /* DT_CLOCK_FSL_QORIQ_CLOCKGEN_H */
index 40d4994..a93b58c 100644 (file)
 #define CLKID_SPICC1_SCLK                      261
 #define CLKID_NNA_AXI_CLK                      264
 #define CLKID_NNA_CORE_CLK                     267
+#define CLKID_MIPI_DSI_PXCLK_SEL               269
+#define CLKID_MIPI_DSI_PXCLK                   270
 
 #endif /* __G12A_CLKC_H */
index 5a2fd64..a48176a 100644 (file)
@@ -3,18 +3,52 @@
  * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
  * Copyright (c) 2020 Western Digital Corporation or its affiliates.
  */
-#ifndef K210_CLK_H
-#define K210_CLK_H
+#ifndef CLOCK_K210_CLK_H
+#define CLOCK_K210_CLK_H
 
 /*
- * Arbitrary identifiers for clocks.
- * The structure is: in0 -> pll0 -> aclk -> cpu
- *
- * Since we use the hardware defaults for now, set all these to the same clock.
+ * Kendryte K210 SoC clock identifiers (arbitrary values).
  */
-#define K210_CLK_PLL0   0
-#define K210_CLK_PLL1   0
-#define K210_CLK_ACLK   0
-#define K210_CLK_CPU    0
+#define K210_CLK_ACLK  0
+#define K210_CLK_CPU   0
+#define K210_CLK_SRAM0 1
+#define K210_CLK_SRAM1 2
+#define K210_CLK_AI    3
+#define K210_CLK_DMA   4
+#define K210_CLK_FFT   5
+#define K210_CLK_ROM   6
+#define K210_CLK_DVP   7
+#define K210_CLK_APB0  8
+#define K210_CLK_APB1  9
+#define K210_CLK_APB2  10
+#define K210_CLK_I2S0  11
+#define K210_CLK_I2S1  12
+#define K210_CLK_I2S2  13
+#define K210_CLK_I2S0_M        14
+#define K210_CLK_I2S1_M        15
+#define K210_CLK_I2S2_M        16
+#define K210_CLK_WDT0  17
+#define K210_CLK_WDT1  18
+#define K210_CLK_SPI0  19
+#define K210_CLK_SPI1  20
+#define K210_CLK_SPI2  21
+#define K210_CLK_I2C0  22
+#define K210_CLK_I2C1  23
+#define K210_CLK_I2C2  24
+#define K210_CLK_SPI3  25
+#define K210_CLK_TIMER0        26
+#define K210_CLK_TIMER1        27
+#define K210_CLK_TIMER2        28
+#define K210_CLK_GPIO  29
+#define K210_CLK_UART1 30
+#define K210_CLK_UART2 31
+#define K210_CLK_UART3 32
+#define K210_CLK_FPIOA 33
+#define K210_CLK_SHA   34
+#define K210_CLK_AES   35
+#define K210_CLK_OTP   36
+#define K210_CLK_RTC   37
 
-#endif /* K210_CLK_H */
+#define K210_NUM_CLKS  38
+
+#endif /* CLOCK_K210_CLK_H */
diff --git a/include/dt-bindings/clock/qcom,camcc-sc7180.h b/include/dt-bindings/clock/qcom,camcc-sc7180.h
new file mode 100644 (file)
index 0000000..ef7d3a0
--- /dev/null
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SC7180_H
+#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SC7180_H
+
+/* CAM_CC clocks */
+#define CAM_CC_PLL2_OUT_EARLY                                  0
+#define CAM_CC_PLL0                                            1
+#define CAM_CC_PLL1                                            2
+#define CAM_CC_PLL2                                            3
+#define CAM_CC_PLL2_OUT_AUX                                    4
+#define CAM_CC_PLL3                                            5
+#define CAM_CC_CAMNOC_AXI_CLK                                  6
+#define CAM_CC_CCI_0_CLK                                       7
+#define CAM_CC_CCI_0_CLK_SRC                                   8
+#define CAM_CC_CCI_1_CLK                                       9
+#define CAM_CC_CCI_1_CLK_SRC                                   10
+#define CAM_CC_CORE_AHB_CLK                                    11
+#define CAM_CC_CPAS_AHB_CLK                                    12
+#define CAM_CC_CPHY_RX_CLK_SRC                                 13
+#define CAM_CC_CSI0PHYTIMER_CLK                                        14
+#define CAM_CC_CSI0PHYTIMER_CLK_SRC                            15
+#define CAM_CC_CSI1PHYTIMER_CLK                                        16
+#define CAM_CC_CSI1PHYTIMER_CLK_SRC                            17
+#define CAM_CC_CSI2PHYTIMER_CLK                                        18
+#define CAM_CC_CSI2PHYTIMER_CLK_SRC                            19
+#define CAM_CC_CSI3PHYTIMER_CLK                                        20
+#define CAM_CC_CSI3PHYTIMER_CLK_SRC                            21
+#define CAM_CC_CSIPHY0_CLK                                     22
+#define CAM_CC_CSIPHY1_CLK                                     23
+#define CAM_CC_CSIPHY2_CLK                                     24
+#define CAM_CC_CSIPHY3_CLK                                     25
+#define CAM_CC_FAST_AHB_CLK_SRC                                        26
+#define CAM_CC_ICP_APB_CLK                                     27
+#define CAM_CC_ICP_ATB_CLK                                     28
+#define CAM_CC_ICP_CLK                                         29
+#define CAM_CC_ICP_CLK_SRC                                     30
+#define CAM_CC_ICP_CTI_CLK                                     31
+#define CAM_CC_ICP_TS_CLK                                      32
+#define CAM_CC_IFE_0_AXI_CLK                                   33
+#define CAM_CC_IFE_0_CLK                                       34
+#define CAM_CC_IFE_0_CLK_SRC                                   35
+#define CAM_CC_IFE_0_CPHY_RX_CLK                               36
+#define CAM_CC_IFE_0_CSID_CLK                                  37
+#define CAM_CC_IFE_0_CSID_CLK_SRC                              38
+#define CAM_CC_IFE_0_DSP_CLK                                   39
+#define CAM_CC_IFE_1_AXI_CLK                                   40
+#define CAM_CC_IFE_1_CLK                                       41
+#define CAM_CC_IFE_1_CLK_SRC                                   42
+#define CAM_CC_IFE_1_CPHY_RX_CLK                               43
+#define CAM_CC_IFE_1_CSID_CLK                                  44
+#define CAM_CC_IFE_1_CSID_CLK_SRC                              45
+#define CAM_CC_IFE_1_DSP_CLK                                   46
+#define CAM_CC_IFE_LITE_CLK                                    47
+#define CAM_CC_IFE_LITE_CLK_SRC                                        48
+#define CAM_CC_IFE_LITE_CPHY_RX_CLK                            49
+#define CAM_CC_IFE_LITE_CSID_CLK                               50
+#define CAM_CC_IFE_LITE_CSID_CLK_SRC                           51
+#define CAM_CC_IPE_0_AHB_CLK                                   52
+#define CAM_CC_IPE_0_AREG_CLK                                  53
+#define CAM_CC_IPE_0_AXI_CLK                                   54
+#define CAM_CC_IPE_0_CLK                                       55
+#define CAM_CC_IPE_0_CLK_SRC                                   56
+#define CAM_CC_JPEG_CLK                                                57
+#define CAM_CC_JPEG_CLK_SRC                                    58
+#define CAM_CC_LRME_CLK                                                59
+#define CAM_CC_LRME_CLK_SRC                                    60
+#define CAM_CC_MCLK0_CLK                                       61
+#define CAM_CC_MCLK0_CLK_SRC                                   62
+#define CAM_CC_MCLK1_CLK                                       63
+#define CAM_CC_MCLK1_CLK_SRC                                   64
+#define CAM_CC_MCLK2_CLK                                       65
+#define CAM_CC_MCLK2_CLK_SRC                                   66
+#define CAM_CC_MCLK3_CLK                                       67
+#define CAM_CC_MCLK3_CLK_SRC                                   68
+#define CAM_CC_MCLK4_CLK                                       69
+#define CAM_CC_MCLK4_CLK_SRC                                   70
+#define CAM_CC_BPS_AHB_CLK                                     71
+#define CAM_CC_BPS_AREG_CLK                                    72
+#define CAM_CC_BPS_AXI_CLK                                     73
+#define CAM_CC_BPS_CLK                                         74
+#define CAM_CC_BPS_CLK_SRC                                     75
+#define CAM_CC_SLOW_AHB_CLK_SRC                                        76
+#define CAM_CC_SOC_AHB_CLK                                     77
+#define CAM_CC_SYS_TMR_CLK                                     78
+
+/* CAM_CC power domains */
+#define BPS_GDSC                                               0
+#define IFE_0_GDSC                                             1
+#define IFE_1_GDSC                                             2
+#define IPE_0_GDSC                                             3
+#define TITAN_TOP_GDSC                                         4
+
+/* CAM_CC resets */
+#define CAM_CC_BPS_BCR                                         0
+#define CAM_CC_CAMNOC_BCR                                      1
+#define CAM_CC_CCI_0_BCR                                       2
+#define CAM_CC_CCI_1_BCR                                       3
+#define CAM_CC_CPAS_BCR                                                4
+#define CAM_CC_CSI0PHY_BCR                                     5
+#define CAM_CC_CSI1PHY_BCR                                     6
+#define CAM_CC_CSI2PHY_BCR                                     7
+#define CAM_CC_CSI3PHY_BCR                                     8
+#define CAM_CC_ICP_BCR                                         9
+#define CAM_CC_IFE_0_BCR                                       10
+#define CAM_CC_IFE_1_BCR                                       11
+#define CAM_CC_IFE_LITE_BCR                                    12
+#define CAM_CC_IPE_0_BCR                                       13
+#define CAM_CC_JPEG_BCR                                                14
+#define CAM_CC_LRME_BCR                                                15
+#define CAM_CC_MCLK0_BCR                                       16
+#define CAM_CC_MCLK1_BCR                                       17
+#define CAM_CC_MCLK2_BCR                                       18
+#define CAM_CC_MCLK3_BCR                                       19
+#define CAM_CC_MCLK4_BCR                                       20
+#define CAM_CC_TITAN_TOP_BCR                                   21
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,gcc-sdx55.h b/include/dt-bindings/clock/qcom,gcc-sdx55.h
new file mode 100644 (file)
index 0000000..fb9a594
--- /dev/null
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2020, Linaro Ltd.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SDX55_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SDX55_H
+
+#define GPLL0                                                  3
+#define GPLL0_OUT_EVEN                                         4
+#define GPLL4                                                  5
+#define GPLL4_OUT_EVEN                                         6
+#define GPLL5                                                  7
+#define GCC_AHB_PCIE_LINK_CLK                                  8
+#define GCC_BLSP1_AHB_CLK                                      9
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK                            10
+#define GCC_BLSP1_QUP1_I2C_APPS_CLK_SRC                                11
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK                            12
+#define GCC_BLSP1_QUP1_SPI_APPS_CLK_SRC                                13
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK                            14
+#define GCC_BLSP1_QUP2_I2C_APPS_CLK_SRC                                15
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK                            16
+#define GCC_BLSP1_QUP2_SPI_APPS_CLK_SRC                                17
+#define GCC_BLSP1_QUP3_I2C_APPS_CLK                            18
+#define GCC_BLSP1_QUP3_I2C_APPS_CLK_SRC                                19
+#define GCC_BLSP1_QUP3_SPI_APPS_CLK                            20
+#define GCC_BLSP1_QUP3_SPI_APPS_CLK_SRC                                21
+#define GCC_BLSP1_QUP4_I2C_APPS_CLK                            22
+#define GCC_BLSP1_QUP4_I2C_APPS_CLK_SRC                                23
+#define GCC_BLSP1_QUP4_SPI_APPS_CLK                            24
+#define GCC_BLSP1_QUP4_SPI_APPS_CLK_SRC                                25
+#define GCC_BLSP1_UART1_APPS_CLK                               26
+#define GCC_BLSP1_UART1_APPS_CLK_SRC                           27
+#define GCC_BLSP1_UART2_APPS_CLK                               28
+#define GCC_BLSP1_UART2_APPS_CLK_SRC                           29
+#define GCC_BLSP1_UART3_APPS_CLK                               30
+#define GCC_BLSP1_UART3_APPS_CLK_SRC                           31
+#define GCC_BLSP1_UART4_APPS_CLK                               32
+#define GCC_BLSP1_UART4_APPS_CLK_SRC                           33
+#define GCC_BOOT_ROM_AHB_CLK                                   34
+#define GCC_CE1_AHB_CLK                                                35
+#define GCC_CE1_AXI_CLK                                                36
+#define GCC_CE1_CLK                                            37
+#define GCC_CPUSS_AHB_CLK                                      38
+#define GCC_CPUSS_AHB_CLK_SRC                                  39
+#define GCC_CPUSS_GNOC_CLK                                     40
+#define GCC_CPUSS_RBCPR_CLK                                    41
+#define GCC_CPUSS_RBCPR_CLK_SRC                                        42
+#define GCC_EMAC_CLK_SRC                                       43
+#define GCC_EMAC_PTP_CLK_SRC                                   44
+#define GCC_ETH_AXI_CLK                                                45
+#define GCC_ETH_PTP_CLK                                                46
+#define GCC_ETH_RGMII_CLK                                      47
+#define GCC_ETH_SLAVE_AHB_CLK                                  48
+#define GCC_GP1_CLK                                            49
+#define GCC_GP1_CLK_SRC                                                50
+#define GCC_GP2_CLK                                            51
+#define GCC_GP2_CLK_SRC                                                52
+#define GCC_GP3_CLK                                            53
+#define GCC_GP3_CLK_SRC                                                54
+#define GCC_PCIE_0_CLKREF_CLK                                  55
+#define GCC_PCIE_AUX_CLK                                       56
+#define GCC_PCIE_AUX_PHY_CLK_SRC                               57
+#define GCC_PCIE_CFG_AHB_CLK                                   58
+#define GCC_PCIE_MSTR_AXI_CLK                                  59
+#define GCC_PCIE_PIPE_CLK                                      60
+#define GCC_PCIE_RCHNG_PHY_CLK                                 61
+#define GCC_PCIE_RCHNG_PHY_CLK_SRC                             62
+#define GCC_PCIE_SLEEP_CLK                                     63
+#define GCC_PCIE_SLV_AXI_CLK                                   64
+#define GCC_PCIE_SLV_Q2A_AXI_CLK                               65
+#define GCC_PDM2_CLK                                           66
+#define GCC_PDM2_CLK_SRC                                       67
+#define GCC_PDM_AHB_CLK                                                68
+#define GCC_PDM_XO4_CLK                                                69
+#define GCC_SDCC1_AHB_CLK                                      70
+#define GCC_SDCC1_APPS_CLK                                     71
+#define GCC_SDCC1_APPS_CLK_SRC                                 72
+#define GCC_SYS_NOC_CPUSS_AHB_CLK                              73
+#define GCC_USB30_MASTER_CLK                                   74
+#define GCC_USB30_MASTER_CLK_SRC                               75
+#define GCC_USB30_MOCK_UTMI_CLK                                        76
+#define GCC_USB30_MOCK_UTMI_CLK_SRC                            77
+#define GCC_USB30_MSTR_AXI_CLK                                 78
+#define GCC_USB30_SLEEP_CLK                                    79
+#define GCC_USB30_SLV_AHB_CLK                                  80
+#define GCC_USB3_PHY_AUX_CLK                                   81
+#define GCC_USB3_PHY_AUX_CLK_SRC                               82
+#define GCC_USB3_PHY_PIPE_CLK                                  83
+#define GCC_USB3_PRIM_CLKREF_CLK                               84
+#define GCC_USB_PHY_CFG_AHB2PHY_CLK                            85
+#define GCC_XO_DIV4_CLK                                                86
+#define GCC_XO_PCIE_LINK_CLK                                   87
+
+#define GCC_EMAC_BCR                                           0
+#define GCC_PCIE_BCR                                           1
+#define GCC_PCIE_LINK_DOWN_BCR                                 2
+#define GCC_PCIE_NOCSR_COM_PHY_BCR                             3
+#define GCC_PCIE_PHY_BCR                                       4
+#define GCC_PCIE_PHY_CFG_AHB_BCR                               5
+#define GCC_PCIE_PHY_COM_BCR                                   6
+#define GCC_PCIE_PHY_NOCSR_COM_PHY_BCR                         7
+#define GCC_PDM_BCR                                            8
+#define GCC_QUSB2PHY_BCR                                       9
+#define GCC_TCSR_PCIE_BCR                                      10
+#define GCC_USB30_BCR                                          11
+#define GCC_USB3_PHY_BCR                                       12
+#define GCC_USB3PHY_PHY_BCR                                    13
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR                            14
+
+/* GCC power domains */
+#define USB30_GDSC                                             0
+#define PCIE_GDSC                                              1
+#define EMAC_GDSC                                              2
+
+#endif
index 2e6c54e..583a991 100644 (file)
 #define RPMH_IPA_CLK                           12
 #define RPMH_LN_BB_CLK1                                13
 #define RPMH_LN_BB_CLK1_A                      14
+#define RPMH_CE_CLK                            15
+#define RPMH_QPIC_CLK                          16
+#define RPMH_DIV_CLK1                          17
+#define RPMH_DIV_CLK1_A                                18
+#define RPMH_RF_CLK4                           19
+#define RPMH_RF_CLK4_A                         20
+#define RPMH_RF_CLK5                           21
+#define RPMH_RF_CLK5_A                         22
+#define RPMH_PKA_CLK                           23
+#define RPMH_HWKM_CLK                          24
 
 #endif
diff --git a/include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h b/include/dt-bindings/clock/qcom,sm8250-lpass-aoncc.h
new file mode 100644 (file)
index 0000000..f5a1cfa
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_CLK_LPASS_AONCC_SM8250_H
+#define _DT_BINDINGS_CLK_LPASS_AONCC_SM8250_H
+
+/* from AOCC */
+#define LPASS_CDC_VA_MCLK                              0
+#define LPASS_CDC_TX_NPL                               1
+#define LPASS_CDC_TX_MCLK                              2
+
+#endif /* _DT_BINDINGS_CLK_LPASS_AONCC_SM8250_H */
diff --git a/include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h b/include/dt-bindings/clock/qcom,sm8250-lpass-audiocc.h
new file mode 100644 (file)
index 0000000..a1aa6cb
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_CLK_LPASS_AUDIOCC_SM8250_H
+#define _DT_BINDINGS_CLK_LPASS_AUDIOCC_SM8250_H
+
+/* From AudioCC */
+#define LPASS_CDC_WSA_NPL                              0
+#define LPASS_CDC_WSA_MCLK                             1
+#define LPASS_CDC_RX_MCLK                              2
+#define LPASS_CDC_RX_NPL                               3
+#define LPASS_CDC_RX_MCLK_MCLK2                                4
+
+#endif /* _DT_BINDINGS_CLK_LPASS_AUDIOCC_SM8250_H */
diff --git a/include/dt-bindings/clock/sifive-fu740-prci.h b/include/dt-bindings/clock/sifive-fu740-prci.h
new file mode 100644 (file)
index 0000000..cd7706e
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Copyright (C) 2019 SiFive, Inc.
+ * Wesley Terpstra
+ * Paul Walmsley
+ * Zong Li
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H
+#define __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H
+
+/* Clock indexes for use by Device Tree data and the PRCI driver */
+
+#define PRCI_CLK_COREPLL              0
+#define PRCI_CLK_DDRPLL                       1
+#define PRCI_CLK_GEMGXLPLL            2
+#define PRCI_CLK_DVFSCOREPLL          3
+#define PRCI_CLK_HFPCLKPLL            4
+#define PRCI_CLK_CLTXPLL              5
+#define PRCI_CLK_TLCLK                6
+#define PRCI_CLK_PCLK                 7
+
+#endif /* __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H */
index 0782b05..af0d958 100644 (file)
@@ -8,8 +8,8 @@
  * The second cell contains standard flag values specified in gpio.h.
  */
 
-#ifndef _DT_BINDINGS_GPIO_TEGRA_GPIO_H
-#define _DT_BINDINGS_GPIO_TEGRA_GPIO_H
+#ifndef _DT_BINDINGS_GPIO_TEGRA186_GPIO_H
+#define _DT_BINDINGS_GPIO_TEGRA186_GPIO_H
 
 #include <dt-bindings/gpio/gpio.h>
 
index 1d94acd..fc85f50 100644 (file)
@@ -24,13 +24,11 @@ struct kvm_pmu {
        int irq_num;
        struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
        DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
-       bool ready;
        bool created;
        bool irq_level;
        struct irq_work overflow_work;
 };
 
-#define kvm_arm_pmu_v3_ready(v)                ((v)->arch.pmu.ready)
 #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS)
 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
@@ -61,7 +59,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
 struct kvm_pmu {
 };
 
-#define kvm_arm_pmu_v3_ready(v)                (false)
 #define kvm_arm_pmu_irq_initialized(v) (false)
 static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
                                            u64 select_idx)
index a8d8fdc..3d74f10 100644 (file)
@@ -402,6 +402,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
                                 struct kvm_kernel_irq_routing_entry *irq_entry);
 
 int vgic_v4_load(struct kvm_vcpu *vcpu);
+void vgic_v4_commit(struct kvm_vcpu *vcpu);
 int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
 
 #endif /* __KVM_ARM_VGIC_H */
index 03a5de5..e431689 100644 (file)
@@ -639,6 +639,12 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev,
                const struct clk_parent_data *parent_data, unsigned long flags,
                void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags,
                const struct clk_div_table *table, spinlock_t *lock);
+struct clk_hw *__devm_clk_hw_register_divider(struct device *dev,
+               struct device_node *np, const char *name,
+               const char *parent_name, const struct clk_hw *parent_hw,
+               const struct clk_parent_data *parent_data, unsigned long flags,
+               void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags,
+               const struct clk_div_table *table, spinlock_t *lock);
 struct clk *clk_register_divider_table(struct device *dev, const char *name,
                const char *parent_name, unsigned long flags,
                void __iomem *reg, u8 shift, u8 width,
@@ -779,6 +785,27 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name,
                                  (parent_data), (flags), (reg), (shift),     \
                                  (width), (clk_divider_flags), (table),      \
                                  (lock))
+/**
+ * devm_clk_hw_register_divider_table - register a table based divider clock
+ * with the clock framework (devres variant)
+ * @dev: device registering this clock
+ * @name: name of this clock
+ * @parent_name: name of clock's parent
+ * @flags: framework-specific flags
+ * @reg: register address to adjust divider
+ * @shift: number of bits to shift the bitfield
+ * @width: width of the bitfield
+ * @clk_divider_flags: divider-specific flags for this clock
+ * @table: array of divider/value pairs ending with a div set to 0
+ * @lock: shared register lock for this clock
+ */
+#define devm_clk_hw_register_divider_table(dev, name, parent_name, flags,     \
+                                          reg, shift, width,                 \
+                                          clk_divider_flags, table, lock)    \
+       __devm_clk_hw_register_divider((dev), NULL, (name), (parent_name),    \
+                                      NULL, NULL, (flags), (reg), (shift),   \
+                                      (width), (clk_divider_flags), (table), \
+                                      (lock))
 
 void clk_unregister_divider(struct clk *clk);
 void clk_hw_unregister_divider(struct clk_hw *hw);
@@ -1062,6 +1089,13 @@ struct clk_hw *clk_hw_register_composite_pdata(struct device *dev,
                struct clk_hw *rate_hw, const struct clk_ops *rate_ops,
                struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
                unsigned long flags);
+struct clk_hw *devm_clk_hw_register_composite_pdata(struct device *dev,
+               const char *name, const struct clk_parent_data *parent_data,
+               int num_parents,
+               struct clk_hw *mux_hw, const struct clk_ops *mux_ops,
+               struct clk_hw *rate_hw, const struct clk_ops *rate_ops,
+               struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
+               unsigned long flags);
 void clk_hw_unregister_composite(struct clk_hw *hw);
 
 struct clk *clk_register(struct device *dev, struct clk_hw *hw);
@@ -1088,6 +1122,11 @@ static inline struct clk_hw *__clk_get_hw(struct clk *clk)
        return (struct clk_hw *)clk;
 }
 #endif
+
+struct clk *clk_hw_get_clk(struct clk_hw *hw, const char *con_id);
+struct clk *devm_clk_hw_get_clk(struct device *dev, struct clk_hw *hw,
+                               const char *con_id);
+
 unsigned int clk_hw_get_num_parents(const struct clk_hw *hw);
 struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw);
 struct clk_hw *clk_hw_get_parent_by_index(const struct clk_hw *hw,
index 7fd6a1f..31ff1bf 100644 (file)
@@ -110,6 +110,17 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb);
 int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb);
 
 /**
+ * devm_clk_notifier_register - register a managed rate-change notifier callback
+ * @dev: device for clock "consumer"
+ * @clk: clock whose rate we are interested in
+ * @nb: notifier block with callback function pointer
+ *
+ * Returns 0 on success, -EERROR otherwise
+ */
+int devm_clk_notifier_register(struct device *dev, struct clk *clk,
+                              struct notifier_block *nb);
+
+/**
  * clk_get_accuracy - obtain the clock accuracy in ppb (parts per billion)
  *                   for a clock source.
  * @clk: clock source
@@ -150,7 +161,7 @@ int clk_get_phase(struct clk *clk);
 int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den);
 
 /**
- * clk_get_duty_cycle - return the duty cycle ratio of a clock signal
+ * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal
  * @clk: clock signal source
  * @scale: scaling factor to be applied to represent the ratio as an integer
  *
@@ -186,6 +197,13 @@ static inline int clk_notifier_unregister(struct clk *clk,
        return -ENOTSUPP;
 }
 
+static inline int devm_clk_notifier_register(struct device *dev,
+                                            struct clk *clk,
+                                            struct notifier_block *nb)
+{
+       return -ENOTSUPP;
+}
+
 static inline long clk_get_accuracy(struct clk *clk)
 {
        return -ENOTSUPP;
index 79097e3..38b7740 100644 (file)
@@ -10,7 +10,7 @@
 
 struct device_node;
 
-#ifdef CONFIG_ARCH_S3C64XX
+#ifdef CONFIG_S3C64XX_COMMON_CLK
 void s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f,
                      unsigned long xusbxti_f, bool s3c6400,
                      void __iomem *base);
@@ -19,7 +19,7 @@ static inline void s3c64xx_clk_init(struct device_node *np,
                                    unsigned long xtal_f,
                                    unsigned long xusbxti_f,
                                    bool s3c6400, void __iomem *base) { }
-#endif /* CONFIG_ARCH_S3C64XX */
+#endif /* CONFIG_S3C64XX_COMMON_CLK */
 
 #ifdef CONFIG_S3C2410_COMMON_CLK
 void s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f,
index 400c094..6e65be7 100644 (file)
@@ -537,6 +537,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
                        int maxevents, int timeout,
                        const compat_sigset_t __user *sigmask,
                        compat_size_t sigsetsize);
+asmlinkage long compat_sys_epoll_pwait2(int epfd,
+                       struct epoll_event __user *events,
+                       int maxevents,
+                       const struct __kernel_timespec __user *timeout,
+                       const compat_sigset_t __user *sigmask,
+                       compat_size_t sigsetsize);
 
 /* fs/fcntl.c */
 asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
index 584fccd..9c8b743 100644 (file)
@@ -320,6 +320,15 @@ struct cpufreq_driver {
                                        unsigned int index);
        unsigned int    (*fast_switch)(struct cpufreq_policy *policy,
                                       unsigned int target_freq);
+       /*
+        * ->fast_switch() replacement for drivers that use an internal
+        * representation of performance levels and can pass hints other than
+        * the target performance level to the hardware.
+        */
+       void            (*adjust_perf)(unsigned int cpu,
+                                      unsigned long min_perf,
+                                      unsigned long target_perf,
+                                      unsigned long capacity);
 
        /*
         * Caches and returns the lowest driver-supported frequency greater than
@@ -588,6 +597,11 @@ struct cpufreq_governor {
 /* Pass a target to the cpufreq driver */
 unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
                                        unsigned int target_freq);
+void cpufreq_driver_adjust_perf(unsigned int cpu,
+                               unsigned long min_perf,
+                               unsigned long target_perf,
+                               unsigned long capacity);
+bool cpufreq_driver_has_adjust_perf(void);
 int cpufreq_driver_target(struct cpufreq_policy *policy,
                                 unsigned int target_freq,
                                 unsigned int relation);
index 583a3a1..278d489 100644 (file)
@@ -122,7 +122,7 @@ struct dma_buf_map {
 
 /**
  * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory
- * @vaddr    A system-memory address
+ * @vaddr_:    A system-memory address
  */
 #define DMA_BUF_MAP_INIT_VADDR(vaddr_) \
        { \
index 03925e4..70fcd0f 100644 (file)
@@ -317,6 +317,20 @@ static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
 void *arch_dma_set_uncached(void *addr, size_t size);
 void arch_dma_clear_uncached(void *addr, size_t size);
 
+#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
+bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr);
+bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle);
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
+               int nents);
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
+               int nents);
+#else
+#define arch_dma_map_page_direct(d, a)         (false)
+#define arch_dma_unmap_page_direct(d, a)       (false)
+#define arch_dma_map_sg_direct(d, s, n)                (false)
+#define arch_dma_unmap_sg_direct(d, s, n)      (false)
+#endif
+
 #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                const struct iommu_ops *iommu, bool coherent);
index dc4fd8a..fa0a524 100644 (file)
@@ -41,6 +41,7 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
                                  __u64 *cnt);
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
 
 DECLARE_PER_CPU(int, eventfd_wake_count);
 
@@ -82,6 +83,11 @@ static inline bool eventfd_signal_count(void)
        return false;
 }
 
+static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+
+}
+
 #endif
 
 #endif /* _LINUX_EVENTFD_H */
index 901aab8..ef49307 100644 (file)
@@ -158,7 +158,7 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
                                       unsigned long *value_bitmap);
 
 int gpiod_set_config(struct gpio_desc *desc, unsigned long config);
-int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
+int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce);
 int gpiod_set_transitory(struct gpio_desc *desc, bool transitory);
 void gpiod_toggle_active_low(struct gpio_desc *desc);
 
@@ -481,7 +481,7 @@ static inline int gpiod_set_config(struct gpio_desc *desc, unsigned long config)
        return -ENOSYS;
 }
 
-static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
+static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce)
 {
        /* GPIO can never have been requested */
        WARN_ON(desc);
index 4a7e295..286de05 100644 (file)
@@ -621,83 +621,12 @@ int gpiochip_irq_domain_activate(struct irq_domain *domain,
 void gpiochip_irq_domain_deactivate(struct irq_domain *domain,
                                    struct irq_data *data);
 
-void gpiochip_set_nested_irqchip(struct gpio_chip *gc,
-               struct irq_chip *irqchip,
-               unsigned int parent_irq);
-
-int gpiochip_irqchip_add_key(struct gpio_chip *gc,
-                            struct irq_chip *irqchip,
-                            unsigned int first_irq,
-                            irq_flow_handler_t handler,
-                            unsigned int type,
-                            bool threaded,
-                            struct lock_class_key *lock_key,
-                            struct lock_class_key *request_key);
-
 bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc,
                                unsigned int offset);
 
 int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
                                struct irq_domain *domain);
 
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Lockdep requires that each irqchip instance be created with a
- * unique key so as to avoid unnecessary warnings. This upfront
- * boilerplate static inlines provides such a key for each
- * unique instance.
- */
-static inline int gpiochip_irqchip_add(struct gpio_chip *gc,
-                                      struct irq_chip *irqchip,
-                                      unsigned int first_irq,
-                                      irq_flow_handler_t handler,
-                                      unsigned int type)
-{
-       static struct lock_class_key lock_key;
-       static struct lock_class_key request_key;
-
-       return gpiochip_irqchip_add_key(gc, irqchip, first_irq,
-                                       handler, type, false,
-                                       &lock_key, &request_key);
-}
-
-static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc,
-                         struct irq_chip *irqchip,
-                         unsigned int first_irq,
-                         irq_flow_handler_t handler,
-                         unsigned int type)
-{
-
-       static struct lock_class_key lock_key;
-       static struct lock_class_key request_key;
-
-       return gpiochip_irqchip_add_key(gc, irqchip, first_irq,
-                                       handler, type, true,
-                                       &lock_key, &request_key);
-}
-#else /* ! CONFIG_LOCKDEP */
-static inline int gpiochip_irqchip_add(struct gpio_chip *gc,
-                                      struct irq_chip *irqchip,
-                                      unsigned int first_irq,
-                                      irq_flow_handler_t handler,
-                                      unsigned int type)
-{
-       return gpiochip_irqchip_add_key(gc, irqchip, first_irq,
-                                       handler, type, false, NULL, NULL);
-}
-
-static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gc,
-                         struct irq_chip *irqchip,
-                         unsigned int first_irq,
-                         irq_flow_handler_t handler,
-                         unsigned int type)
-{
-       return gpiochip_irqchip_add_key(gc, irqchip, first_irq,
-                                       handler, type, true, NULL, NULL);
-}
-#endif /* CONFIG_LOCKDEP */
-
 int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset);
 void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset);
 int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset,
index 6976b83..943c341 100644 (file)
@@ -39,6 +39,8 @@ struct its_vpe {
        irq_hw_number_t         vpe_db_lpi;
        /* VPE resident */
        bool                    resident;
+       /* VPT parse complete */
+       bool                    ready;
        union {
                /* GICv4.0 implementations */
                struct {
@@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type {
        PROP_UPDATE_AND_INV_VLPI,
        SCHEDULE_VPE,
        DESCHEDULE_VPE,
+       COMMIT_VPE,
        INVALL_VPE,
        PROP_UPDATE_VSGI,
 };
@@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm);
 void its_free_vcpu_irqs(struct its_vm *vm);
 int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en);
 int its_make_vpe_non_resident(struct its_vpe *vpe, bool db);
+int its_commit_vpe(struct its_vpe *vpe);
 int its_invall_vpe(struct its_vpe *vpe);
 int its_map_vlpi(int irq, struct its_vlpi_map *map);
 int its_get_vlpi(int irq, struct its_vlpi_map *map);
index ac6aba6..ca5e89f 100644 (file)
@@ -9,7 +9,7 @@
  * even in compilation units that selectively disable KASAN, but must use KASAN
  * to validate access to an address.   Never use these in header files!
  */
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 bool __kasan_check_read(const volatile void *p, unsigned int size);
 bool __kasan_check_write(const volatile void *p, unsigned int size);
 #else
index 30d343b..5e0655f 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
+#include <linux/static_key.h>
 #include <linux/types.h>
 
 struct kmem_cache;
@@ -11,7 +12,7 @@ struct task_struct;
 
 #ifdef CONFIG_KASAN
 
-#include <linux/pgtable.h>
+#include <linux/linkage.h>
 #include <asm/kasan.h>
 
 /* kasan_data struct is used in KUnit tests for KASAN expected failures */
@@ -20,6 +21,20 @@ struct kunit_kasan_expectation {
        bool report_found;
 };
 
+#endif
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+
+#include <linux/pgtable.h>
+
+/* Software KASAN implementations use shadow memory. */
+
+#ifdef CONFIG_KASAN_SW_TAGS
+#define KASAN_SHADOW_INIT 0xFF
+#else
+#define KASAN_SHADOW_INIT 0
+#endif
+
 extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
 extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE];
 extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD];
@@ -35,88 +50,219 @@ static inline void *kasan_mem_to_shadow(const void *addr)
                + KASAN_SHADOW_OFFSET;
 }
 
+int kasan_add_zero_shadow(void *start, unsigned long size);
+void kasan_remove_zero_shadow(void *start, unsigned long size);
+
 /* Enable reporting bugs after kasan_disable_current() */
 extern void kasan_enable_current(void);
 
 /* Disable reporting bugs for current task */
 extern void kasan_disable_current(void);
 
-void kasan_unpoison_shadow(const void *address, size_t size);
-
-void kasan_unpoison_task_stack(struct task_struct *task);
-
-void kasan_alloc_pages(struct page *page, unsigned int order);
-void kasan_free_pages(struct page *page, unsigned int order);
+#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
-void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
-                       slab_flags_t *flags);
+static inline int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+       return 0;
+}
+static inline void kasan_remove_zero_shadow(void *start,
+                                       unsigned long size)
+{}
 
-void kasan_poison_slab(struct page *page);
-void kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
-void kasan_poison_object_data(struct kmem_cache *cache, void *object);
-void * __must_check kasan_init_slab_obj(struct kmem_cache *cache,
-                                       const void *object);
+static inline void kasan_enable_current(void) {}
+static inline void kasan_disable_current(void) {}
 
-void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
-                                               gfp_t flags);
-void kasan_kfree_large(void *ptr, unsigned long ip);
-void kasan_poison_kfree(void *ptr, unsigned long ip);
-void * __must_check kasan_kmalloc(struct kmem_cache *s, const void *object,
-                                       size_t size, gfp_t flags);
-void * __must_check kasan_krealloc(const void *object, size_t new_size,
-                                       gfp_t flags);
+#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
-void * __must_check kasan_slab_alloc(struct kmem_cache *s, void *object,
-                                       gfp_t flags);
-bool kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip);
+#ifdef CONFIG_KASAN
 
 struct kasan_cache {
        int alloc_meta_offset;
        int free_meta_offset;
 };
 
-/*
- * These functions provide a special case to support backing module
- * allocations with real shadow memory. With KASAN vmalloc, the special
- * case is unnecessary, as the work is handled in the generic case.
- */
-#ifndef CONFIG_KASAN_VMALLOC
-int kasan_module_alloc(void *addr, size_t size);
-void kasan_free_shadow(const struct vm_struct *vm);
-#else
-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
-static inline void kasan_free_shadow(const struct vm_struct *vm) {}
-#endif
+#ifdef CONFIG_KASAN_HW_TAGS
 
-int kasan_add_zero_shadow(void *start, unsigned long size);
-void kasan_remove_zero_shadow(void *start, unsigned long size);
+DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
 
-size_t __ksize(const void *);
-static inline void kasan_unpoison_slab(const void *ptr)
+static __always_inline bool kasan_enabled(void)
 {
-       kasan_unpoison_shadow(ptr, __ksize(ptr));
+       return static_branch_likely(&kasan_flag_enabled);
 }
-size_t kasan_metadata_size(struct kmem_cache *cache);
 
-bool kasan_save_enable_multi_shot(void);
-void kasan_restore_multi_shot(bool enabled);
+#else /* CONFIG_KASAN_HW_TAGS */
 
-#else /* CONFIG_KASAN */
+static inline bool kasan_enabled(void)
+{
+       return true;
+}
 
-static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
+#endif /* CONFIG_KASAN_HW_TAGS */
 
-static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+slab_flags_t __kasan_never_merge(void);
+static __always_inline slab_flags_t kasan_never_merge(void)
+{
+       if (kasan_enabled())
+               return __kasan_never_merge();
+       return 0;
+}
 
-static inline void kasan_enable_current(void) {}
-static inline void kasan_disable_current(void) {}
+void __kasan_unpoison_range(const void *addr, size_t size);
+static __always_inline void kasan_unpoison_range(const void *addr, size_t size)
+{
+       if (kasan_enabled())
+               __kasan_unpoison_range(addr, size);
+}
+
+void __kasan_alloc_pages(struct page *page, unsigned int order);
+static __always_inline void kasan_alloc_pages(struct page *page,
+                                               unsigned int order)
+{
+       if (kasan_enabled())
+               __kasan_alloc_pages(page, order);
+}
+
+void __kasan_free_pages(struct page *page, unsigned int order);
+static __always_inline void kasan_free_pages(struct page *page,
+                                               unsigned int order)
+{
+       if (kasan_enabled())
+               __kasan_free_pages(page, order);
+}
+
+void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
+                               slab_flags_t *flags);
+static __always_inline void kasan_cache_create(struct kmem_cache *cache,
+                               unsigned int *size, slab_flags_t *flags)
+{
+       if (kasan_enabled())
+               __kasan_cache_create(cache, size, flags);
+}
+
+size_t __kasan_metadata_size(struct kmem_cache *cache);
+static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
+{
+       if (kasan_enabled())
+               return __kasan_metadata_size(cache);
+       return 0;
+}
+
+void __kasan_poison_slab(struct page *page);
+static __always_inline void kasan_poison_slab(struct page *page)
+{
+       if (kasan_enabled())
+               __kasan_poison_slab(page);
+}
+
+void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
+static __always_inline void kasan_unpoison_object_data(struct kmem_cache *cache,
+                                                       void *object)
+{
+       if (kasan_enabled())
+               __kasan_unpoison_object_data(cache, object);
+}
+
+void __kasan_poison_object_data(struct kmem_cache *cache, void *object);
+static __always_inline void kasan_poison_object_data(struct kmem_cache *cache,
+                                                       void *object)
+{
+       if (kasan_enabled())
+               __kasan_poison_object_data(cache, object);
+}
+
+void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
+                                         const void *object);
+static __always_inline void * __must_check kasan_init_slab_obj(
+                               struct kmem_cache *cache, const void *object)
+{
+       if (kasan_enabled())
+               return __kasan_init_slab_obj(cache, object);
+       return (void *)object;
+}
+
+bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip);
+static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object,
+                                               unsigned long ip)
+{
+       if (kasan_enabled())
+               return __kasan_slab_free(s, object, ip);
+       return false;
+}
+
+void __kasan_slab_free_mempool(void *ptr, unsigned long ip);
+static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip)
+{
+       if (kasan_enabled())
+               __kasan_slab_free_mempool(ptr, ip);
+}
 
+void * __must_check __kasan_slab_alloc(struct kmem_cache *s,
+                                      void *object, gfp_t flags);
+static __always_inline void * __must_check kasan_slab_alloc(
+                               struct kmem_cache *s, void *object, gfp_t flags)
+{
+       if (kasan_enabled())
+               return __kasan_slab_alloc(s, object, flags);
+       return object;
+}
+
+void * __must_check __kasan_kmalloc(struct kmem_cache *s, const void *object,
+                                   size_t size, gfp_t flags);
+static __always_inline void * __must_check kasan_kmalloc(struct kmem_cache *s,
+                               const void *object, size_t size, gfp_t flags)
+{
+       if (kasan_enabled())
+               return __kasan_kmalloc(s, object, size, flags);
+       return (void *)object;
+}
+
+void * __must_check __kasan_kmalloc_large(const void *ptr,
+                                         size_t size, gfp_t flags);
+static __always_inline void * __must_check kasan_kmalloc_large(const void *ptr,
+                                                     size_t size, gfp_t flags)
+{
+       if (kasan_enabled())
+               return __kasan_kmalloc_large(ptr, size, flags);
+       return (void *)ptr;
+}
+
+void * __must_check __kasan_krealloc(const void *object,
+                                    size_t new_size, gfp_t flags);
+static __always_inline void * __must_check kasan_krealloc(const void *object,
+                                                size_t new_size, gfp_t flags)
+{
+       if (kasan_enabled())
+               return __kasan_krealloc(object, new_size, flags);
+       return (void *)object;
+}
+
+void __kasan_kfree_large(void *ptr, unsigned long ip);
+static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip)
+{
+       if (kasan_enabled())
+               __kasan_kfree_large(ptr, ip);
+}
+
+bool kasan_save_enable_multi_shot(void);
+void kasan_restore_multi_shot(bool enabled);
+
+#else /* CONFIG_KASAN */
+
+static inline bool kasan_enabled(void)
+{
+       return false;
+}
+static inline slab_flags_t kasan_never_merge(void)
+{
+       return 0;
+}
+static inline void kasan_unpoison_range(const void *address, size_t size) {}
 static inline void kasan_alloc_pages(struct page *page, unsigned int order) {}
 static inline void kasan_free_pages(struct page *page, unsigned int order) {}
-
 static inline void kasan_cache_create(struct kmem_cache *cache,
                                      unsigned int *size,
                                      slab_flags_t *flags) {}
-
+static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
 static inline void kasan_poison_slab(struct page *page) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
                                        void *object) {}
@@ -127,54 +273,42 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
 {
        return (void *)object;
 }
-
-static inline void *kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags)
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
+                                  unsigned long ip)
 {
-       return ptr;
+       return false;
+}
+static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {}
+static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
+                                  gfp_t flags)
+{
+       return object;
 }
-static inline void kasan_kfree_large(void *ptr, unsigned long ip) {}
-static inline void kasan_poison_kfree(void *ptr, unsigned long ip) {}
 static inline void *kasan_kmalloc(struct kmem_cache *s, const void *object,
                                size_t size, gfp_t flags)
 {
        return (void *)object;
 }
+static inline void *kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags)
+{
+       return (void *)ptr;
+}
 static inline void *kasan_krealloc(const void *object, size_t new_size,
                                 gfp_t flags)
 {
        return (void *)object;
 }
-
-static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
-                                  gfp_t flags)
-{
-       return object;
-}
-static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
-                                  unsigned long ip)
-{
-       return false;
-}
-
-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
-static inline void kasan_free_shadow(const struct vm_struct *vm) {}
-
-static inline int kasan_add_zero_shadow(void *start, unsigned long size)
-{
-       return 0;
-}
-static inline void kasan_remove_zero_shadow(void *start,
-                                       unsigned long size)
-{}
-
-static inline void kasan_unpoison_slab(const void *ptr) { }
-static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
+static inline void kasan_kfree_large(void *ptr, unsigned long ip) {}
 
 #endif /* CONFIG_KASAN */
 
-#ifdef CONFIG_KASAN_GENERIC
+#if defined(CONFIG_KASAN) && CONFIG_KASAN_STACK
+void kasan_unpoison_task_stack(struct task_struct *task);
+#else
+static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+#endif
 
-#define KASAN_SHADOW_INIT 0
+#ifdef CONFIG_KASAN_GENERIC
 
 void kasan_cache_shrink(struct kmem_cache *cache);
 void kasan_cache_shutdown(struct kmem_cache *cache);
@@ -188,36 +322,50 @@ static inline void kasan_record_aux_stack(void *ptr) {}
 
 #endif /* CONFIG_KASAN_GENERIC */
 
-#ifdef CONFIG_KASAN_SW_TAGS
-
-#define KASAN_SHADOW_INIT 0xFF
-
-void kasan_init_tags(void);
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 
-void *kasan_reset_tag(const void *addr);
+static inline void *kasan_reset_tag(const void *addr)
+{
+       return (void *)arch_kasan_reset_tag(addr);
+}
 
 bool kasan_report(unsigned long addr, size_t size,
                bool is_write, unsigned long ip);
 
-#else /* CONFIG_KASAN_SW_TAGS */
-
-static inline void kasan_init_tags(void) { }
+#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
 
 static inline void *kasan_reset_tag(const void *addr)
 {
        return (void *)addr;
 }
 
-#endif /* CONFIG_KASAN_SW_TAGS */
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/
+
+#ifdef CONFIG_KASAN_SW_TAGS
+void __init kasan_init_sw_tags(void);
+#else
+static inline void kasan_init_sw_tags(void) { }
+#endif
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void kasan_init_hw_tags_cpu(void);
+void __init kasan_init_hw_tags(void);
+#else
+static inline void kasan_init_hw_tags_cpu(void) { }
+static inline void kasan_init_hw_tags(void) { }
+#endif
 
 #ifdef CONFIG_KASAN_VMALLOC
+
 int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
 void kasan_poison_vmalloc(const void *start, unsigned long size);
 void kasan_unpoison_vmalloc(const void *start, unsigned long size);
 void kasan_release_vmalloc(unsigned long start, unsigned long end,
                           unsigned long free_region_start,
                           unsigned long free_region_end);
-#else
+
+#else /* CONFIG_KASAN_VMALLOC */
+
 static inline int kasan_populate_vmalloc(unsigned long start,
                                        unsigned long size)
 {
@@ -232,7 +380,26 @@ static inline void kasan_release_vmalloc(unsigned long start,
                                         unsigned long end,
                                         unsigned long free_region_start,
                                         unsigned long free_region_end) {}
-#endif
+
+#endif /* CONFIG_KASAN_VMALLOC */
+
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+               !defined(CONFIG_KASAN_VMALLOC)
+
+/*
+ * These functions provide a special case to support backing module
+ * allocations with real shadow memory. With KASAN vmalloc, the special
+ * case is unnecessary, as the work is handled in the generic case.
+ */
+int kasan_module_alloc(void *addr, size_t size);
+void kasan_free_shadow(const struct vm_struct *vm);
+
+#else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
+
+static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
+static inline void kasan_free_shadow(const struct vm_struct *vm) {}
+
+#endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */
 
 #ifdef CONFIG_KASAN_INLINE
 void kasan_non_canonical_hook(unsigned long addr);
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
new file mode 100644 (file)
index 0000000..120e5e9
--- /dev/null
@@ -0,0 +1,103 @@
+#ifndef KVM_DIRTY_RING_H
+#define KVM_DIRTY_RING_H
+
+#include <linux/kvm.h>
+
+/**
+ * kvm_dirty_ring: KVM internal dirty ring structure
+ *
+ * @dirty_index: free running counter that points to the next slot in
+ *               dirty_ring->dirty_gfns, where a new dirty page should go
+ * @reset_index: free running counter that points to the next dirty page
+ *               in dirty_ring->dirty_gfns for which dirty trap needs to
+ *               be reenabled
+ * @size:        size of the compact list, dirty_ring->dirty_gfns
+ * @soft_limit:  when the number of dirty pages in the list reaches this
+ *               limit, vcpu that owns this ring should exit to userspace
+ *               to allow userspace to harvest all the dirty pages
+ * @dirty_gfns:  the array to keep the dirty gfns
+ * @index:       index of this dirty ring
+ */
+struct kvm_dirty_ring {
+       u32 dirty_index;
+       u32 reset_index;
+       u32 size;
+       u32 soft_limit;
+       struct kvm_dirty_gfn *dirty_gfns;
+       int index;
+};
+
+#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+/*
+ * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * not be included as well, so define these nop functions for the arch.
+ */
+static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+       return 0;
+}
+
+static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
+                                      int index, u32 size)
+{
+       return 0;
+}
+
+static inline struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+       return NULL;
+}
+
+static inline int kvm_dirty_ring_reset(struct kvm *kvm,
+                                      struct kvm_dirty_ring *ring)
+{
+       return 0;
+}
+
+static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
+                                      u32 slot, u64 offset)
+{
+}
+
+static inline struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring,
+                                                  u32 offset)
+{
+       return NULL;
+}
+
+static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+}
+
+static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+       return true;
+}
+
+#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+u32 kvm_dirty_ring_get_rsvd_entries(void);
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm);
+
+/*
+ * called with kvm->slots_lock held, returns the number of
+ * processed pages.
+ */
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
+
+/*
+ * returns =0: successfully pushed
+ *         <0: unable to push, need to wait
+ */
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
+
+/* for use in vm_operations_struct */
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
+
+#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+
+#endif /* KVM_DIRTY_RING_H */
index 7f2e2a0..f3b1013 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/kvm_types.h>
 
 #include <asm/kvm_host.h>
+#include <linux/kvm_dirty_ring.h>
 
 #ifndef KVM_MAX_VCPU_ID
 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
@@ -319,6 +320,7 @@ struct kvm_vcpu {
        bool preempted;
        bool ready;
        struct kvm_vcpu_arch arch;
+       struct kvm_dirty_ring dirty_ring;
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
@@ -349,6 +351,11 @@ struct kvm_memory_slot {
        u16 as_id;
 };
 
+static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot)
+{
+       return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
+}
+
 static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
 {
        return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -505,6 +512,7 @@ struct kvm {
        struct srcu_struct irq_srcu;
        pid_t userspace_pid;
        unsigned int max_halt_poll_ns;
+       u32 dirty_ring_size;
 };
 
 #define kvm_err(fmt, ...) \
@@ -792,13 +800,12 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
                        offset_in_page(__gpa), v);                      \
 })
 
-int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
+void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
@@ -1478,4 +1485,14 @@ static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */
 
+/*
+ * This defines how many reserved entries we want to keep before we
+ * kick the vcpu to the userspace to avoid dirty ring full.  This
+ * value can be tuned to higher if e.g. PML is enabled on the host.
+ */
+#define  KVM_DIRTY_RING_RSVD_ENTRIES  64
+
+/* Max number of entries allowed for each kvm dirty ring */
+#define  KVM_DIRTY_RING_MAX_ENTRIES  65536
+
 #endif
index 08ed57e..d827bd7 100644 (file)
@@ -620,9 +620,10 @@ mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
 /**
  * mem_cgroup_lruvec - get the lru list vector for a memcg & node
  * @memcg: memcg of the wanted lruvec
+ * @pgdat: pglist_data
  *
  * Returns the lru list vector holding pages for a given @memcg &
- * @node combination. This can be the node lruvec, if the memory
+ * @pgdat combination. This can be the node lruvec, if the memory
  * controller is disabled.
  */
 static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
@@ -652,7 +653,21 @@ out:
        return lruvec;
 }
 
-struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
+/**
+ * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
+ * @page: the page
+ * @pgdat: pgdat of the page
+ *
+ * This function relies on page->mem_cgroup being stable.
+ */
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
+                                               struct pglist_data *pgdat)
+{
+       struct mem_cgroup *memcg = page_memcg(page);
+
+       VM_WARN_ON_ONCE_PAGE(!memcg, page);
+       return mem_cgroup_lruvec(memcg, pgdat);
+}
 
 static inline bool lruvec_holds_page_lru_lock(struct page *page,
                                              struct lruvec *lruvec)
@@ -913,41 +928,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
        local_irq_restore(flags);
 }
 
-/**
- * mod_memcg_page_state - update page state statistics
- * @page: the page
- * @idx: page state item to account
- * @val: number of pages (positive or negative)
- *
- * The @page must be locked or the caller must use lock_page_memcg()
- * to prevent double accounting when the page is concurrently being
- * moved to another memcg:
- *
- *   lock_page(page) or lock_page_memcg(page)
- *   if (TestClearPageState(page))
- *     mod_memcg_page_state(page, state, -1);
- *   unlock_page(page) or unlock_page_memcg(page)
- *
- * Kernel pages are an exception to this, since they'll never move.
- */
-static inline void __mod_memcg_page_state(struct page *page,
-                                         int idx, int val)
-{
-       struct mem_cgroup *memcg = page_memcg(page);
-
-       if (memcg)
-               __mod_memcg_state(memcg, idx, val);
-}
-
-static inline void mod_memcg_page_state(struct page *page,
-                                       int idx, int val)
-{
-       struct mem_cgroup *memcg = page_memcg(page);
-
-       if (memcg)
-               mod_memcg_state(memcg, idx, val);
-}
-
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
 {
@@ -1395,18 +1375,6 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg,
 {
 }
 
-static inline void __mod_memcg_page_state(struct page *page,
-                                         int idx,
-                                         int nr)
-{
-}
-
-static inline void mod_memcg_page_state(struct page *page,
-                                       int idx,
-                                       int nr)
-{
-}
-
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                              enum node_stat_item idx)
 {
@@ -1479,34 +1447,6 @@ static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
 }
 #endif /* CONFIG_MEMCG */
 
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void __inc_memcg_state(struct mem_cgroup *memcg,
-                                    int idx)
-{
-       __mod_memcg_state(memcg, idx, 1);
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void __dec_memcg_state(struct mem_cgroup *memcg,
-                                    int idx)
-{
-       __mod_memcg_state(memcg, idx, -1);
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void __inc_memcg_page_state(struct page *page,
-                                         int idx)
-{
-       __mod_memcg_page_state(page, idx, 1);
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void __dec_memcg_page_state(struct page *page,
-                                         int idx)
-{
-       __mod_memcg_page_state(page, idx, -1);
-}
-
 static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
 {
        __mod_lruvec_kmem_state(p, idx, 1);
@@ -1517,34 +1457,6 @@ static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx)
        __mod_lruvec_kmem_state(p, idx, -1);
 }
 
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void inc_memcg_state(struct mem_cgroup *memcg,
-                                  int idx)
-{
-       mod_memcg_state(memcg, idx, 1);
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void dec_memcg_state(struct mem_cgroup *memcg,
-                                  int idx)
-{
-       mod_memcg_state(memcg, idx, -1);
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void inc_memcg_page_state(struct page *page,
-                                       int idx)
-{
-       mod_memcg_page_state(page, idx, 1);
-}
-
-/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void dec_memcg_page_state(struct page *page,
-                                       int idx)
-{
-       mod_memcg_page_state(page, idx, -1);
-}
-
 static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 {
        struct mem_cgroup *memcg;
@@ -1733,21 +1645,6 @@ static inline void memcg_kmem_uncharge_page(struct page *page, int order)
                __memcg_kmem_uncharge_page(page, order);
 }
 
-static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
-                                   unsigned int nr_pages)
-{
-       if (memcg_kmem_enabled())
-               return __memcg_kmem_charge(memcg, gfp, nr_pages);
-       return 0;
-}
-
-static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg,
-                                      unsigned int nr_pages)
-{
-       if (memcg_kmem_enabled())
-               __memcg_kmem_uncharge(memcg, nr_pages);
-}
-
 /*
  * A helper for accessing memcg's kmem_id, used for getting
  * corresponding LRU lists.
index 8551610..5299b90 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/sizes.h>
 #include <linux/sched.h>
 #include <linux/pgtable.h>
+#include <linux/kasan.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1421,23 +1422,31 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+
 static inline u8 page_kasan_tag(const struct page *page)
 {
-       return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
+       if (kasan_enabled())
+               return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
+       return 0xff;
 }
 
 static inline void page_kasan_tag_set(struct page *page, u8 tag)
 {
-       page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
-       page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+       if (kasan_enabled()) {
+               page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
+               page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
+       }
 }
 
 static inline void page_kasan_tag_reset(struct page *page)
 {
-       page_kasan_tag_set(page, 0xff);
+       if (kasan_enabled())
+               page_kasan_tag_set(page, 0xff);
 }
-#else
+
+#else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
+
 static inline u8 page_kasan_tag(const struct page *page)
 {
        return 0xff;
@@ -1445,7 +1454,8 @@ static inline u8 page_kasan_tag(const struct page *page)
 
 static inline void page_kasan_tag_set(struct page *page, u8 tag) { }
 static inline void page_kasan_tag_reset(struct page *page) { }
-#endif
+
+#endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
 
 static inline struct zone *page_zone(const struct page *page)
 {
@@ -2702,6 +2712,8 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma)
 }
 #endif
 
+void vma_set_file(struct vm_area_struct *vma, struct file *file);
+
 #ifdef CONFIG_NUMA_BALANCING
 unsigned long change_prot_numa(struct vm_area_struct *vma,
                        unsigned long start, unsigned long end);
index 2ad72d2..5d0767c 100644 (file)
@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
                        BUG();                                          \
                }                                                       \
        } while (0)
+#define VM_WARN_ON_ONCE_PAGE(cond, page)       ({                      \
+       static bool __section(".data.once") __warned;                   \
+       int __ret_warn_once = !!(cond);                                 \
+                                                                       \
+       if (unlikely(__ret_warn_once && !__warned)) {                   \
+               dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
+               __warned = true;                                        \
+               WARN_ON(1);                                             \
+       }                                                               \
+       unlikely(__ret_warn_once);                                      \
+})
+
 #define VM_WARN_ON(cond) (void)WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
 #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
index 4fa67a8..9e09d11 100644 (file)
@@ -96,7 +96,8 @@ void module_arch_cleanup(struct module *mod);
 /* Any cleanup before freeing mod->module_init */
 void module_arch_freeing_init(struct module *mod);
 
-#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC)
+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+               !defined(CONFIG_KASAN_VMALLOC)
 #include <linux/kasan.h>
 #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
 #else
index e200eef..7d4ec26 100644 (file)
@@ -77,7 +77,7 @@
 #define LAST_CPUPID_SHIFT 0
 #endif
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 #define KASAN_TAG_WIDTH 8
 #else
 #define KASAN_TAG_WIDTH 0
index a3a9a87..8637677 100644 (file)
@@ -1284,6 +1284,8 @@ enum ec_feature_code {
        EC_FEATURE_SCP = 39,
        /* The MCU is an Integrated Sensor Hub */
        EC_FEATURE_ISH = 40,
+       /* New TCPMv2 TYPEC_ prefaced commands supported */
+       EC_FEATURE_TYPEC_CMD = 41,
 };
 
 #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32)
@@ -5528,6 +5530,159 @@ struct ec_response_regulator_get_voltage {
        uint32_t voltage_mv;
 } __ec_align4;
 
+/*
+ * Gather all discovery information for the given port and partner type.
+ *
+ * Note that if discovery has not yet completed, only the currently completed
+ * responses will be filled in.   If the discovery data structures are changed
+ * in the process of the command running, BUSY will be returned.
+ *
+ * VDO field sizes are set to the maximum possible number of VDOs a VDM may
+ * contain, while the number of SVIDs here is selected to fit within the PROTO2
+ * maximum parameter size.
+ */
+#define EC_CMD_TYPEC_DISCOVERY 0x0131
+
+enum typec_partner_type {
+       TYPEC_PARTNER_SOP = 0,
+       TYPEC_PARTNER_SOP_PRIME = 1,
+};
+
+struct ec_params_typec_discovery {
+       uint8_t port;
+       uint8_t partner_type; /* enum typec_partner_type */
+} __ec_align1;
+
+struct svid_mode_info {
+       uint16_t svid;
+       uint16_t mode_count;  /* Number of modes partner sent */
+       uint32_t mode_vdo[6]; /* Max VDOs allowed after VDM header is 6 */
+};
+
+struct ec_response_typec_discovery {
+       uint8_t identity_count;    /* Number of identity VDOs partner sent */
+       uint8_t svid_count;        /* Number of SVIDs partner sent */
+       uint16_t reserved;
+       uint32_t discovery_vdo[6]; /* Max VDOs allowed after VDM header is 6 */
+       struct svid_mode_info svids[0];
+} __ec_align1;
+
+/*
+ * Gather all status information for a port.
+ *
+ * Note: this covers many of the return fields from the deprecated
+ * EC_CMD_USB_PD_CONTROL command, except those that are redundant with the
+ * discovery data.  The "enum pd_cc_states" is defined with the deprecated
+ * EC_CMD_USB_PD_CONTROL command.
+ *
+ * This also combines in the EC_CMD_USB_PD_MUX_INFO flags.
+ */
+#define EC_CMD_TYPEC_STATUS 0x0133
+
+/*
+ * Power role.
+ *
+ * Note this is also used for PD header creation, and values align to those in
+ * the Power Delivery Specification Revision 3.0 (See
+ * 6.2.1.1.4 Port Power Role).
+ */
+enum pd_power_role {
+       PD_ROLE_SINK = 0,
+       PD_ROLE_SOURCE = 1
+};
+
+/*
+ * Data role.
+ *
+ * Note this is also used for PD header creation, and the first two values
+ * align to those in the Power Delivery Specification Revision 3.0 (See
+ * 6.2.1.1.6 Port Data Role).
+ */
+enum pd_data_role {
+       PD_ROLE_UFP = 0,
+       PD_ROLE_DFP = 1,
+       PD_ROLE_DISCONNECTED = 2,
+};
+
+enum pd_vconn_role {
+       PD_ROLE_VCONN_OFF = 0,
+       PD_ROLE_VCONN_SRC = 1,
+};
+
+/*
+ * Note: BIT(0) may be used to determine whether the polarity is CC1 or CC2,
+ * regardless of whether a debug accessory is connected.
+ */
+enum tcpc_cc_polarity {
+       /*
+        * _CCx: is used to indicate the polarity while not connected to
+        * a Debug Accessory.  Only one CC line will assert a resistor and
+        * the other will be open.
+        */
+       POLARITY_CC1 = 0,
+       POLARITY_CC2 = 1,
+
+       /*
+        * _CCx_DTS is used to indicate the polarity while connected to a
+        * SRC Debug Accessory.  Assert resistors on both lines.
+        */
+       POLARITY_CC1_DTS = 2,
+       POLARITY_CC2_DTS = 3,
+
+       /*
+        * The current TCPC code relies on these specific POLARITY values.
+        * Adding in a check to verify if the list grows for any reason
+        * that this will give a hint that other places need to be
+        * adjusted.
+        */
+       POLARITY_COUNT
+};
+
+#define PD_STATUS_EVENT_SOP_DISC_DONE          BIT(0)
+#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE    BIT(1)
+
+struct ec_params_typec_status {
+       uint8_t port;
+} __ec_align1;
+
+struct ec_response_typec_status {
+       uint8_t pd_enabled;             /* PD communication enabled - bool */
+       uint8_t dev_connected;          /* Device connected - bool */
+       uint8_t sop_connected;          /* Device is SOP PD capable - bool */
+       uint8_t source_cap_count;       /* Number of Source Cap PDOs */
+
+       uint8_t power_role;             /* enum pd_power_role */
+       uint8_t data_role;              /* enum pd_data_role */
+       uint8_t vconn_role;             /* enum pd_vconn_role */
+       uint8_t sink_cap_count;         /* Number of Sink Cap PDOs */
+
+       uint8_t polarity;               /* enum tcpc_cc_polarity */
+       uint8_t cc_state;               /* enum pd_cc_states */
+       uint8_t dp_pin;                 /* DP pin mode (MODE_DP_IN_[A-E]) */
+       uint8_t mux_state;              /* USB_PD_MUX* - encoded mux state */
+
+       char tc_state[32];              /* TC state name */
+
+       uint32_t events;                /* PD_STATUS_EVENT bitmask */
+
+       /*
+        * BCD PD revisions for partners
+        *
+        * The format has the PD major reversion in the upper nibble, and PD
+        * minor version in the next nibble.  Following two nibbles are
+        * currently 0.
+        * ex. PD 3.2 would map to 0x3200
+        *
+        * PD major/minor will be 0 if no PD device is connected.
+        */
+       uint16_t sop_revision;
+       uint16_t sop_prime_revision;
+
+       uint32_t source_cap_pdos[7];    /* Max 7 PDOs can be present */
+
+       uint32_t sink_cap_pdos[7];      /* Max 7 PDOs can be present */
+} __ec_align1;
+
 /*****************************************************************************/
 /* The command range 0x200-0x2FF is reserved for Rotor. */
 
index 40f9c76..c68cbf3 100644 (file)
  * @battery_info:         recommended structure to specify static power supply
  *                        parameters
  * @cal_charge:           calculate charge level.
- * @gpio_charge_finished: gpio for the charger.
- * @gpio_inverted:        Should be 1 if the GPIO is active low otherwise 0
  * @jitter_delay:         delay required after the interrupt to check battery
  *                       status.Default set is 10ms.
  */
 struct gab_platform_data {
        struct power_supply_info battery_info;
        int     (*cal_charge)(long value);
-       int     gpio_charge_finished;
-       bool    gpio_inverted;
        int     jitter_delay;
 };
 
index 2a1bfb8..4ca0060 100644 (file)
@@ -34,6 +34,15 @@ struct psci_operations {
 
 extern struct psci_operations psci_ops;
 
+struct psci_0_1_function_ids {
+       u32 cpu_suspend;
+       u32 cpu_on;
+       u32 cpu_off;
+       u32 migrate;
+};
+
+struct psci_0_1_function_ids get_psci_0_1_function_ids(void);
+
 #if defined(CONFIG_ARM_PSCI_FW)
 int __init psci_dt_init(void);
 #else
index a13ff38..e4d84d4 100644 (file)
@@ -473,11 +473,6 @@ static inline int pwmchip_add(struct pwm_chip *chip)
        return -EINVAL;
 }
 
-static inline int pwmchip_add_inversed(struct pwm_chip *chip)
-{
-       return -EINVAL;
-}
-
 static inline int pwmchip_remove(struct pwm_chip *chip)
 {
        return -EINVAL;
index b829382..5689094 100644 (file)
@@ -141,12 +141,6 @@ struct rtc_device {
         */
        unsigned long set_offset_nsec;
 
-       bool registered;
-
-       /* Old ABI support */
-       bool nvram_old_abi;
-       struct bin_attribute *nvram;
-
        time64_t range_min;
        timeu64_t range_max;
        time64_t start_secs;
@@ -184,7 +178,7 @@ extern struct rtc_device *devm_rtc_device_register(struct device *dev,
                                        const struct rtc_class_ops *ops,
                                        struct module *owner);
 struct rtc_device *devm_rtc_allocate_device(struct device *dev);
-int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
+int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc);
 
 extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
 extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
@@ -227,8 +221,8 @@ static inline bool is_leap_year(unsigned int year)
        return (!(year % 4) && (year % 100)) || !(year % 400);
 }
 
-#define rtc_register_device(device) \
-       __rtc_register_device(THIS_MODULE, device)
+#define devm_rtc_register_device(device) \
+       __devm_rtc_register_device(THIS_MODULE, device)
 
 #ifdef CONFIG_RTC_HCTOSYS_DEVICE
 extern int rtc_hctosys_ret;
@@ -237,16 +231,14 @@ extern int rtc_hctosys_ret;
 #endif
 
 #ifdef CONFIG_RTC_NVMEM
-int rtc_nvmem_register(struct rtc_device *rtc,
-                      struct nvmem_config *nvmem_config);
-void rtc_nvmem_unregister(struct rtc_device *rtc);
+int devm_rtc_nvmem_register(struct rtc_device *rtc,
+                           struct nvmem_config *nvmem_config);
 #else
-static inline int rtc_nvmem_register(struct rtc_device *rtc,
-                                    struct nvmem_config *nvmem_config)
+static inline int devm_rtc_nvmem_register(struct rtc_device *rtc,
+                                         struct nvmem_config *nvmem_config)
 {
        return 0;
 }
-static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
 #endif
 
 #ifdef CONFIG_RTC_INTF_SYSFS
index 833871d..57f982c 100644 (file)
@@ -14,9 +14,6 @@ struct s3c_adc_bat_pdata {
        void (*enable_charger)(void);
        void (*disable_charger)(void);
 
-       int gpio_charge_finished;
-       int gpio_inverted;
-
        const struct s3c_adc_bat_thresh *lut_noac;
        unsigned int lut_noac_cnt;
        const struct s3c_adc_bat_thresh *lut_acin;
index 51d535b..6e3a5ee 100644 (file)
@@ -1234,7 +1234,7 @@ struct task_struct {
        u64                             timer_slack_ns;
        u64                             default_timer_slack_ns;
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        unsigned int                    kasan_depth;
 #endif
 
index 3ed5aa1..6205578 100644 (file)
@@ -28,6 +28,11 @@ static inline unsigned long map_util_freq(unsigned long util,
 {
        return (freq + (freq >> 2)) * util / cap;
 }
+
+static inline unsigned long map_util_perf(unsigned long util)
+{
+       return util + (util >> 2);
+}
 #endif /* CONFIG_CPU_FREQ */
 
 #endif /* _LINUX_SCHED_CPUFREQ_H */
index 1cd63a8..4fcfb56 100644 (file)
@@ -267,7 +267,7 @@ void __write_overflow(void) __compiletime_error("detected write beyond size of o
 
 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
 extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
 extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
index df0c3c7..f3929af 100644 (file)
@@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
                                int maxevents, int timeout,
                                const sigset_t __user *sigmask,
                                size_t sigsetsize);
+asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events,
+                                int maxevents,
+                                const struct __kernel_timespec __user *timeout,
+                                const sigset_t __user *sigmask,
+                                size_t sigsetsize);
 
 /* fs/fcntl.c */
 asmlinkage long sys_dup(unsigned int fildes);
index 30bc7a7..0fefeb9 100644 (file)
@@ -42,6 +42,7 @@ struct vdpa_vq_state {
  * @config: the configuration ops for this device.
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
+ * @nvqs: maximum number of supported virtqueues
  */
 struct vdpa_device {
        struct device dev;
index 27fb99c..fe10e85 100644 (file)
@@ -22,6 +22,7 @@ int default_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int
 #define WQ_FLAG_BOOKMARK       0x04
 #define WQ_FLAG_CUSTOM         0x08
 #define WQ_FLAG_DONE           0x10
+#define WQ_FLAG_PRIORITY       0x20
 
 /*
  * A single wait-queue entry structure:
@@ -164,11 +165,20 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 
 extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
 static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
-       list_add(&wq_entry->entry, &wq_head->head);
+       struct list_head *head = &wq_head->head;
+       struct wait_queue_entry *wq;
+
+       list_for_each_entry(wq, &wq_head->head, entry) {
+               if (!(wq->flags & WQ_FLAG_PRIORITY))
+                       break;
+               head = &wq->entry;
+       }
+       list_add(&wq_entry->entry, head);
 }
 
 /*
index dd5b5bd..e1c308d 100644 (file)
@@ -140,10 +140,16 @@ struct p9_client {
  *
  * TODO: This needs lots of explanation.
  */
+enum fid_source {
+       FID_FROM_OTHER,
+       FID_FROM_INODE,
+       FID_FROM_DENTRY,
+};
 
 struct p9_fid {
        struct p9_client *clnt;
        u32 fid;
+       refcount_t count;
        int mode;
        struct p9_qid qid;
        u32 iounit;
@@ -152,6 +158,7 @@ struct p9_fid {
        void *rdir;
 
        struct hlist_node dlist;        /* list of all fids attached to a dentry */
+       struct hlist_node ilist;
 };
 
 /**
index cb1aea2..e19edc6 100644 (file)
@@ -118,6 +118,50 @@ DEFINE_EVENT(clk_rate, clk_set_rate_complete,
        TP_ARGS(core, rate)
 );
 
+DEFINE_EVENT(clk_rate, clk_set_min_rate,
+
+       TP_PROTO(struct clk_core *core, unsigned long rate),
+
+       TP_ARGS(core, rate)
+);
+
+DEFINE_EVENT(clk_rate, clk_set_max_rate,
+
+       TP_PROTO(struct clk_core *core, unsigned long rate),
+
+       TP_ARGS(core, rate)
+);
+
+DECLARE_EVENT_CLASS(clk_rate_range,
+
+       TP_PROTO(struct clk_core *core, unsigned long min, unsigned long max),
+
+       TP_ARGS(core, min, max),
+
+       TP_STRUCT__entry(
+               __string(        name,           core->name                )
+               __field(unsigned long,           min                       )
+               __field(unsigned long,           max                       )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, core->name);
+               __entry->min = min;
+               __entry->max = max;
+       ),
+
+       TP_printk("%s min %lu max %lu", __get_str(name),
+                 (unsigned long)__entry->min,
+                 (unsigned long)__entry->max)
+);
+
+DEFINE_EVENT(clk_rate_range, clk_set_rate_range,
+
+       TP_PROTO(struct clk_core *core, unsigned long min, unsigned long max),
+
+       TP_ARGS(core, min, max)
+);
+
 DECLARE_EVENT_CLASS(clk_parent,
 
        TP_PROTO(struct clk_core *core, struct clk_core *parent),
index 26cfb0f..49d7d0f 100644 (file)
@@ -399,6 +399,69 @@ TRACE_EVENT(kvm_halt_poll_ns,
 #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \
        trace_kvm_halt_poll_ns(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_dirty_ring_push,
+       TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset),
+       TP_ARGS(ring, slot, offset),
+
+       TP_STRUCT__entry(
+               __field(int, index)
+               __field(u32, dirty_index)
+               __field(u32, reset_index)
+               __field(u32, slot)
+               __field(u64, offset)
+       ),
+
+       TP_fast_assign(
+               __entry->index          = ring->index;
+               __entry->dirty_index    = ring->dirty_index;
+               __entry->reset_index    = ring->reset_index;
+               __entry->slot           = slot;
+               __entry->offset         = offset;
+       ),
+
+       TP_printk("ring %d: dirty 0x%x reset 0x%x "
+                 "slot %u offset 0x%llx (used %u)",
+                 __entry->index, __entry->dirty_index,
+                 __entry->reset_index,  __entry->slot, __entry->offset,
+                 __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_reset,
+       TP_PROTO(struct kvm_dirty_ring *ring),
+       TP_ARGS(ring),
+
+       TP_STRUCT__entry(
+               __field(int, index)
+               __field(u32, dirty_index)
+               __field(u32, reset_index)
+       ),
+
+       TP_fast_assign(
+               __entry->index          = ring->index;
+               __entry->dirty_index    = ring->dirty_index;
+               __entry->reset_index    = ring->reset_index;
+       ),
+
+       TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)",
+                 __entry->index, __entry->dirty_index, __entry->reset_index,
+                 __entry->dirty_index - __entry->reset_index)
+);
+
+TRACE_EVENT(kvm_dirty_ring_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+           __field(int, vcpu_id)
+       ),
+
+       TP_fast_assign(
+           __entry->vcpu_id = vcpu->vcpu_id;
+       ),
+
+       TP_printk("vcpu %d", __entry->vcpu_id)
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
index fc48c64..7287529 100644 (file)
@@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
 #define __NR_process_madvise 440
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_epoll_pwait2 441
+__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 441
+#define __NR_syscalls 442
 
 /*
  * 32 bit systems traditionally used different
index 5ad10ab..b49fbf2 100644 (file)
@@ -218,6 +218,27 @@ extern "C" {
 #define DRM_MODE_CONTENT_PROTECTION_DESIRED     1
 #define DRM_MODE_CONTENT_PROTECTION_ENABLED     2
 
+/**
+ * struct drm_mode_modeinfo - Display mode information.
+ * @clock: pixel clock in kHz
+ * @hdisplay: horizontal display size
+ * @hsync_start: horizontal sync start
+ * @hsync_end: horizontal sync end
+ * @htotal: horizontal total size
+ * @hskew: horizontal skew
+ * @vdisplay: vertical display size
+ * @vsync_start: vertical sync start
+ * @vsync_end: vertical sync end
+ * @vtotal: vertical total size
+ * @vscan: vertical scan
+ * @vrefresh: approximate vertical refresh rate in Hz
+ * @flags: bitmask of misc. flags, see DRM_MODE_FLAG_* defines
+ * @type: bitmask of type flags, see DRM_MODE_TYPE_* defines
+ * @name: string describing the mode resolution
+ *
+ * This is the user-space API display mode information structure. For the
+ * kernel version see struct drm_display_mode.
+ */
 struct drm_mode_modeinfo {
        __u32 clock;
        __u16 hdisplay;
@@ -368,27 +389,95 @@ enum drm_mode_subconnector {
 #define DRM_MODE_CONNECTOR_WRITEBACK   18
 #define DRM_MODE_CONNECTOR_SPI         19
 
+/**
+ * struct drm_mode_get_connector - Get connector metadata.
+ *
+ * User-space can perform a GETCONNECTOR ioctl to retrieve information about a
+ * connector. User-space is expected to retrieve encoders, modes and properties
+ * by performing this ioctl at least twice: the first time to retrieve the
+ * number of elements, the second time to retrieve the elements themselves.
+ *
+ * To retrieve the number of elements, set @count_props and @count_encoders to
+ * zero, set @count_modes to 1, and set @modes_ptr to a temporary struct
+ * drm_mode_modeinfo element.
+ *
+ * To retrieve the elements, allocate arrays for @encoders_ptr, @modes_ptr,
+ * @props_ptr and @prop_values_ptr, then set @count_modes, @count_props and
+ * @count_encoders to their capacity.
+ *
+ * Performing the ioctl only twice may be racy: the number of elements may have
+ * changed with a hotplug event in-between the two ioctls. User-space is
+ * expected to retry the last ioctl until the number of elements stabilizes.
+ * The kernel won't fill any array which doesn't have the expected length.
+ *
+ * **Force-probing a connector**
+ *
+ * If the @count_modes field is set to zero, the kernel will perform a forced
+ * probe on the connector to refresh the connector status, modes and EDID.
+ * A forced-probe can be slow and the ioctl will block. A force-probe can cause
+ * flickering and temporary freezes, so it should not be performed
+ * automatically.
+ *
+ * User-space shouldn't need to force-probe connectors in general: the kernel
+ * will automatically take care of probing connectors that don't support
+ * hot-plug detection when appropriate. However, user-space may force-probe
+ * connectors on user request (e.g. clicking a "Scan connectors" button, or
+ * opening a UI to manage screens).
+ */
 struct drm_mode_get_connector {
-
+       /** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */
        __u64 encoders_ptr;
+       /** @modes_ptr: Pointer to struct drm_mode_modeinfo array. */
        __u64 modes_ptr;
+       /** @props_ptr: Pointer to ``__u32`` array of property IDs. */
        __u64 props_ptr;
+       /** @prop_values_ptr: Pointer to ``__u64`` array of property values. */
        __u64 prop_values_ptr;
 
+       /** @count_modes: Number of modes. */
        __u32 count_modes;
+       /** @count_props: Number of properties. */
        __u32 count_props;
+       /** @count_encoders: Number of encoders. */
        __u32 count_encoders;
 
-       __u32 encoder_id; /**< Current Encoder */
-       __u32 connector_id; /**< Id */
+       /** @encoder_id: Object ID of the current encoder. */
+       __u32 encoder_id;
+       /** @connector_id: Object ID of the connector. */
+       __u32 connector_id;
+       /**
+        * @connector_type: Type of the connector.
+        *
+        * See DRM_MODE_CONNECTOR_* defines.
+        */
        __u32 connector_type;
+       /**
+        * @connector_type_id: Type-specific connector number.
+        *
+        * This is not an object ID. This is a per-type connector number. Each
+        * (type, type_id) combination is unique across all connectors of a DRM
+        * device.
+        */
        __u32 connector_type_id;
 
+       /**
+        * @connection: Status of the connector.
+        *
+        * See enum drm_connector_status.
+        */
        __u32 connection;
-       __u32 mm_width;  /**< width in millimeters */
-       __u32 mm_height; /**< height in millimeters */
+       /** @mm_width: Width of the connected sink in millimeters. */
+       __u32 mm_width;
+       /** @mm_height: Height of the connected sink in millimeters. */
+       __u32 mm_height;
+       /**
+        * @subpixel: Subpixel order of the connected sink.
+        *
+        * See enum subpixel_order.
+        */
        __u32 subpixel;
 
+       /** @pad: Padding, must be zero. */
        __u32 pad;
 };
 
@@ -905,24 +994,23 @@ struct drm_format_modifier {
 
 /**
  * struct drm_mode_create_blob - Create New block property
- * @data: Pointer to data to copy.
- * @length: Length of data to copy.
- * @blob_id: new property ID.
+ *
  * Create a new 'blob' data property, copying length bytes from data pointer,
  * and returning new blob ID.
  */
 struct drm_mode_create_blob {
-       /** Pointer to data to copy. */
+       /** @data: Pointer to data to copy. */
        __u64 data;
-       /** Length of data to copy. */
+       /** @length: Length of data to copy. */
        __u32 length;
-       /** Return: new property ID. */
+       /** @blob_id: Return: new property ID. */
        __u32 blob_id;
 };
 
 /**
  * struct drm_mode_destroy_blob - Destroy user blob
  * @blob_id: blob_id to destroy
+ *
  * Destroy a user-created blob property.
  *
  * User-space can release blobs as soon as they do not need to refer to them by
@@ -937,36 +1025,32 @@ struct drm_mode_destroy_blob {
 
 /**
  * struct drm_mode_create_lease - Create lease
- * @object_ids: Pointer to array of object ids.
- * @object_count: Number of object ids.
- * @flags: flags for new FD.
- * @lessee_id: unique identifier for lessee.
- * @fd: file descriptor to new drm_master file.
+ *
  * Lease mode resources, creating another drm_master.
  */
 struct drm_mode_create_lease {
-       /** Pointer to array of object ids (__u32) */
+       /** @object_ids: Pointer to array of object ids (__u32) */
        __u64 object_ids;
-       /** Number of object ids */
+       /** @object_count: Number of object ids */
        __u32 object_count;
-       /** flags for new FD (O_CLOEXEC, etc) */
+       /** @flags: flags for new FD (O_CLOEXEC, etc) */
        __u32 flags;
 
-       /** Return: unique identifier for lessee. */
+       /** @lessee_id: Return: unique identifier for lessee. */
        __u32 lessee_id;
-       /** Return: file descriptor to new drm_master file */
+       /** @fd: Return: file descriptor to new drm_master file */
        __u32 fd;
 };
 
 /**
  * struct drm_mode_list_lessees - List lessees
- * @count_lessees: Number of lessees.
- * @pad: pad.
- * @lessees_ptr: Pointer to lessess.
- * List lesses from a drm_master
+ *
+ * List lesses from a drm_master.
  */
 struct drm_mode_list_lessees {
-       /** Number of lessees.
+       /**
+        * @count_lessees: Number of lessees.
+        *
         * On input, provides length of the array.
         * On output, provides total number. No
         * more than the input number will be written
@@ -974,23 +1058,26 @@ struct drm_mode_list_lessees {
         * the size and then the data.
         */
        __u32 count_lessees;
+       /** @pad: Padding. */
        __u32 pad;
 
-       /** Pointer to lessees.
-        * pointer to __u64 array of lessee ids
+       /**
+        * @lessees_ptr: Pointer to lessees.
+        *
+        * Pointer to __u64 array of lessee ids
         */
        __u64 lessees_ptr;
 };
 
 /**
  * struct drm_mode_get_lease - Get Lease
- * @count_objects: Number of leased objects.
- * @pad: pad.
- * @objects_ptr: Pointer to objects.
- * Get leased objects
+ *
+ * Get leased objects.
  */
 struct drm_mode_get_lease {
-       /** Number of leased objects.
+       /**
+        * @count_objects: Number of leased objects.
+        *
         * On input, provides length of the array.
         * On output, provides total number. No
         * more than the input number will be written
@@ -998,22 +1085,22 @@ struct drm_mode_get_lease {
         * the size and then the data.
         */
        __u32 count_objects;
+       /** @pad: Padding. */
        __u32 pad;
 
-       /** Pointer to objects.
-        * pointer to __u32 array of object ids
+       /**
+        * @objects_ptr: Pointer to objects.
+        *
+        * Pointer to __u32 array of object ids.
         */
        __u64 objects_ptr;
 };
 
 /**
  * struct drm_mode_revoke_lease - Revoke lease
- * @lessee_id: Unique ID of lessee.
- * Revoke lease
  */
 struct drm_mode_revoke_lease {
-       /** Unique ID of lessee
-        */
+       /** @lessee_id: Unique ID of lessee */
        __u32 lessee_id;
 };
 
diff --git a/include/uapi/linux/cifs/cifs_netlink.h b/include/uapi/linux/cifs/cifs_netlink.h
new file mode 100644 (file)
index 0000000..da31075
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: LGPL-2.1+ WITH Linux-syscall-note */
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+
+#ifndef _UAPILINUX_CIFS_NETLINK_H
+#define _UAPILINUX_CIFS_NETLINK_H
+
+#define CIFS_GENL_NAME                 "cifs"
+#define CIFS_GENL_VERSION              0x1
+
+#define CIFS_GENL_MCGRP_SWN_NAME       "cifs_mcgrp_swn"
+
+enum cifs_genl_multicast_groups {
+       CIFS_GENL_MCGRP_SWN,
+};
+
+enum cifs_genl_attributes {
+       CIFS_GENL_ATTR_UNSPEC,
+       CIFS_GENL_ATTR_SWN_REGISTRATION_ID,
+       CIFS_GENL_ATTR_SWN_NET_NAME,
+       CIFS_GENL_ATTR_SWN_SHARE_NAME,
+       CIFS_GENL_ATTR_SWN_IP,
+       CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY,
+       CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY,
+       CIFS_GENL_ATTR_SWN_IP_NOTIFY,
+       CIFS_GENL_ATTR_SWN_KRB_AUTH,
+       CIFS_GENL_ATTR_SWN_USER_NAME,
+       CIFS_GENL_ATTR_SWN_PASSWORD,
+       CIFS_GENL_ATTR_SWN_DOMAIN_NAME,
+       CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE,
+       CIFS_GENL_ATTR_SWN_RESOURCE_STATE,
+       CIFS_GENL_ATTR_SWN_RESOURCE_NAME,
+       __CIFS_GENL_ATTR_MAX,
+};
+#define CIFS_GENL_ATTR_MAX (__CIFS_GENL_ATTR_MAX - 1)
+
+enum cifs_genl_commands {
+       CIFS_GENL_CMD_UNSPEC,
+       CIFS_GENL_CMD_SWN_REGISTER,
+       CIFS_GENL_CMD_SWN_UNREGISTER,
+       CIFS_GENL_CMD_SWN_NOTIFY,
+       __CIFS_GENL_CMD_MAX
+};
+#define CIFS_GENL_CMD_MAX (__CIFS_GENL_CMD_MAX - 1)
+
+enum cifs_swn_notification_type {
+       CIFS_SWN_NOTIFICATION_RESOURCE_CHANGE = 0x01,
+       CIFS_SWN_NOTIFICATION_CLIENT_MOVE        = 0x02,
+       CIFS_SWN_NOTIFICATION_SHARE_MOVE         = 0x03,
+       CIFS_SWN_NOTIFICATION_IP_CHANGE  = 0x04,
+};
+
+enum cifs_swn_resource_state {
+       CIFS_SWN_RESOURCE_STATE_UNKNOWN     = 0x00,
+       CIFS_SWN_RESOURCE_STATE_AVAILABLE   = 0x01,
+       CIFS_SWN_RESOURCE_STATE_UNAVAILABLE = 0xFF
+};
+
+#endif /* _UAPILINUX_CIFS_NETLINK_H */
index 2072c26..e4eb0b8 100644 (file)
@@ -65,6 +65,7 @@ struct gpiochip_info {
  * @GPIO_V2_LINE_FLAG_BIAS_PULL_UP: line has pull-up bias enabled
  * @GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN: line has pull-down bias enabled
  * @GPIO_V2_LINE_FLAG_BIAS_DISABLED: line has bias disabled
+ * @GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME: line events contain REALTIME timestamps
  */
 enum gpio_v2_line_flag {
        GPIO_V2_LINE_FLAG_USED                  = _BITULL(0),
@@ -78,6 +79,7 @@ enum gpio_v2_line_flag {
        GPIO_V2_LINE_FLAG_BIAS_PULL_UP          = _BITULL(8),
        GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN        = _BITULL(9),
        GPIO_V2_LINE_FLAG_BIAS_DISABLED         = _BITULL(10),
+       GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME  = _BITULL(11),
 };
 
 /**
@@ -270,9 +272,6 @@ enum gpio_v2_line_event_id {
 /**
  * struct gpio_v2_line_event - The actual event being pushed to userspace
  * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds.
- * The @timestamp_ns is read from %CLOCK_MONOTONIC and is intended to allow
- * the accurate measurement of the time between events. It does not provide
- * the wall-clock time.
  * @id: event identifier with value from &enum gpio_v2_line_event_id
  * @offset: the offset of the line that triggered the event
  * @seqno: the sequence number for this event in the sequence of events for
@@ -280,6 +279,13 @@ enum gpio_v2_line_event_id {
  * @line_seqno: the sequence number for this event in the sequence of
  * events on this particular line
  * @padding: reserved for future use
+ *
+ * By default the @timestamp_ns is read from %CLOCK_MONOTONIC and is
+ * intended to allow the accurate measurement of the time between events.
+ * It does not provide the wall-clock time.
+ *
+ * If the %GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME flag is set then the
+ * @timestamp_ns is read from %CLOCK_REALTIME.
  */
 struct gpio_v2_line_event {
        __aligned_u64 timestamp_ns;
index ca41220..886802b 100644 (file)
@@ -250,6 +250,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_ARM_NISV         28
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
+#define KVM_EXIT_DIRTY_RING_FULL  31
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1511,7 +1514,7 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
 #define KVM_CLEAR_DIRTY_LOG          _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
 
-/* Available with KVM_CAP_HYPERV_CPUID */
+/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */
 #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
 
 /* Available with KVM_CAP_ARM_SVE */
@@ -1557,6 +1560,9 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_X86_MSR_FILTER */
 #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO,  0xc6, struct kvm_msr_filter)
 
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS          _IO(KVMIO, 0xc7)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
        /* Guest initialization commands */
@@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd {
 #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE    (1 << 0)
 #define KVM_DIRTY_LOG_INITIALLY_SET            (1 << 1)
 
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature.  KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---------------+---------------+--------------|
+ * | bit 1 (reset) | bit 0 (dirty) | Status       |
+ * |---------------+---------------+--------------|
+ * |             0 |             0 | Invalid GFN  |
+ * |             0 |             1 | Dirty GFN    |
+ * |             1 |             X | GFN to reset |
+ * |---------------+---------------+--------------|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ *      dirtied         harvested        reset
+ * 00 -----------> 01 -------------> 1X -------+
+ *  ^                                          |
+ *  |                                          |
+ *  +------------------------------------------+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry.  Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_MASK            0x3
+
+/*
+ * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of
+ * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn.  The
+ * size of the gfn buffer is decided by the first argument when
+ * enabling KVM_CAP_DIRTY_LOG_RING.
+ */
+struct kvm_dirty_gfn {
+       __u32 flags;
+       __u32 slot;
+       __u64 offset;
+};
+
 #endif /* __LINUX_KVM_H */
index 0ec6b61..97523a9 100644 (file)
@@ -115,6 +115,10 @@ enum virtio_gpu_ctrl_type {
 
 enum virtio_gpu_shm_id {
        VIRTIO_GPU_SHM_ID_UNDEFINED = 0,
+       /*
+        * VIRTIO_GPU_CMD_RESOURCE_MAP_BLOB
+        * VIRTIO_GPU_CMD_RESOURCE_UNMAP_BLOB
+        */
        VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1
 };
 
index b052355..bc1c062 100644 (file)
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
 
-#define VIRTIO_ID_NET          1 /* virtio net */
-#define VIRTIO_ID_BLOCK                2 /* virtio block */
-#define VIRTIO_ID_CONSOLE      3 /* virtio console */
-#define VIRTIO_ID_RNG          4 /* virtio rng */
-#define VIRTIO_ID_BALLOON      5 /* virtio balloon */
-#define VIRTIO_ID_RPMSG                7 /* virtio remote processor messaging */
-#define VIRTIO_ID_SCSI         8 /* virtio scsi */
-#define VIRTIO_ID_9P           9 /* 9p virtio console */
-#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
-#define VIRTIO_ID_CAIF        12 /* Virtio caif */
-#define VIRTIO_ID_GPU          16 /* virtio GPU */
-#define VIRTIO_ID_INPUT        18 /* virtio input */
-#define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
-#define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
-#define VIRTIO_ID_IOMMU        23 /* virtio IOMMU */
-#define VIRTIO_ID_MEM          24 /* virtio mem */
-#define VIRTIO_ID_FS           26 /* virtio filesystem */
-#define VIRTIO_ID_PMEM         27 /* virtio pmem */
-#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_NET                  1 /* virtio net */
+#define VIRTIO_ID_BLOCK                        2 /* virtio block */
+#define VIRTIO_ID_CONSOLE              3 /* virtio console */
+#define VIRTIO_ID_RNG                  4 /* virtio rng */
+#define VIRTIO_ID_BALLOON              5 /* virtio balloon */
+#define VIRTIO_ID_IOMEM                        6 /* virtio ioMemory */
+#define VIRTIO_ID_RPMSG                        7 /* virtio remote processor messaging */
+#define VIRTIO_ID_SCSI                 8 /* virtio scsi */
+#define VIRTIO_ID_9P                   9 /* 9p virtio console */
+#define VIRTIO_ID_MAC80211_WLAN                10 /* virtio WLAN MAC */
+#define VIRTIO_ID_RPROC_SERIAL         11 /* virtio remoteproc serial link */
+#define VIRTIO_ID_CAIF                 12 /* Virtio caif */
+#define VIRTIO_ID_MEMORY_BALLOON       13 /* virtio memory balloon */
+#define VIRTIO_ID_GPU                  16 /* virtio GPU */
+#define VIRTIO_ID_CLOCK                        17 /* virtio clock/timer */
+#define VIRTIO_ID_INPUT                        18 /* virtio input */
+#define VIRTIO_ID_VSOCK                        19 /* virtio vsock transport */
+#define VIRTIO_ID_CRYPTO               20 /* virtio crypto */
+#define VIRTIO_ID_SIGNAL_DIST          21 /* virtio signal distribution device */
+#define VIRTIO_ID_PSTORE               22 /* virtio pstore device */
+#define VIRTIO_ID_IOMMU                        23 /* virtio IOMMU */
+#define VIRTIO_ID_MEM                  24 /* virtio mem */
+#define VIRTIO_ID_FS                   26 /* virtio filesystem */
+#define VIRTIO_ID_PMEM                 27 /* virtio pmem */
+#define VIRTIO_ID_MAC80211_HWSIM       29 /* virtio mac80211-hwsim */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
index 15f6eb9..8a992d7 100644 (file)
@@ -176,7 +176,7 @@ struct task_struct init_task
        .numa_group     = NULL,
        .numa_faults    = NULL,
 #endif
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        .kasan_depth    = 1,
 #endif
 #ifdef CONFIG_KCSAN
index fd2db26..479fc14 100644 (file)
@@ -20,6 +20,10 @@ config DMA_OPS
 config DMA_OPS_BYPASS
        bool
 
+# Lets platform IOMMU driver choose between bypass and IOMMU
+config ARCH_HAS_DMA_MAP_DIRECT
+       bool
+
 config NEED_SG_DMA_LENGTH
        bool
 
@@ -220,3 +224,12 @@ config DMA_API_DEBUG_SG
          is technically out-of-spec.
 
          If unsure, say N.
+
+config DMA_MAP_BENCHMARK
+       bool "Enable benchmarking of streaming DMA mapping"
+       depends on DEBUG_FS
+       help
+         Provides /sys/kernel/debug/dma_map_benchmark that helps with testing
+         performance of dma_(un)map_page.
+
+         See tools/testing/selftests/dma/dma_map_benchmark.c
index cd1d863..0dd65ec 100644 (file)
@@ -9,3 +9,4 @@ obj-$(CONFIG_DMA_API_DEBUG)             += debug.o
 obj-$(CONFIG_SWIOTLB)                  += swiotlb.o
 obj-$(CONFIG_DMA_COHERENT_POOL)                += pool.o
 obj-$(CONFIG_DMA_REMAP)                        += remap.o
+obj-$(CONFIG_DMA_MAP_BENCHMARK)                += map_benchmark.o
index 16b95ff..3d63d91 100644 (file)
@@ -20,7 +20,7 @@
  *   coders, etc.
  *
  *   Such devices often require big memory buffers (a full HD frame
- *   is, for instance, more then 2 mega pixels large, i.e. more than 6
+ *   is, for instance, more than 2 mega pixels large, i.e. more than 6
  *   MB of memory), which makes mechanisms such as kmalloc() or
  *   alloc_page() ineffective.
  *
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
new file mode 100644 (file)
index 0000000..b1496e7
--- /dev/null
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#define pr_fmt(fmt)    KBUILD_MODNAME ": " fmt
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/timekeeping.h>
+
+#define DMA_MAP_BENCHMARK      _IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS    1024
+#define DMA_MAP_MAX_SECONDS    300
+
+#define DMA_MAP_BIDIRECTIONAL  0
+#define DMA_MAP_TO_DEVICE      1
+#define DMA_MAP_FROM_DEVICE    2
+
+struct map_benchmark {
+       __u64 avg_map_100ns; /* average map latency in 100ns */
+       __u64 map_stddev; /* standard deviation of map latency */
+       __u64 avg_unmap_100ns; /* as above */
+       __u64 unmap_stddev;
+       __u32 threads; /* how many threads will do map/unmap in parallel */
+       __u32 seconds; /* how long the test will last */
+       __s32 node; /* which numa node this benchmark will run on */
+       __u32 dma_bits; /* DMA addressing capability */
+       __u32 dma_dir; /* DMA data direction */
+       __u64 expansion[10];    /* For future use */
+};
+
+struct map_benchmark_data {
+       struct map_benchmark bparam;
+       struct device *dev;
+       struct dentry  *debugfs;
+       enum dma_data_direction dir;
+       atomic64_t sum_map_100ns;
+       atomic64_t sum_unmap_100ns;
+       atomic64_t sum_sq_map;
+       atomic64_t sum_sq_unmap;
+       atomic64_t loops;
+};
+
+static int map_benchmark_thread(void *data)
+{
+       void *buf;
+       dma_addr_t dma_addr;
+       struct map_benchmark_data *map = data;
+       int ret = 0;
+
+       buf = (void *)__get_free_page(GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       while (!kthread_should_stop())  {
+               u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
+               ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
+               ktime_t map_delta, unmap_delta;
+
+               /*
+                * for a non-coherent device, if we don't stain them in the
+                * cache, this will give an underestimate of the real-world
+                * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
+                * 66 means evertything goes well! 66 is lucky.
+                */
+               if (map->dir != DMA_FROM_DEVICE)
+                       memset(buf, 0x66, PAGE_SIZE);
+
+               map_stime = ktime_get();
+               dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir);
+               if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
+                       pr_err("dma_map_single failed on %s\n",
+                               dev_name(map->dev));
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               map_etime = ktime_get();
+               map_delta = ktime_sub(map_etime, map_stime);
+
+               unmap_stime = ktime_get();
+               dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
+               unmap_etime = ktime_get();
+               unmap_delta = ktime_sub(unmap_etime, unmap_stime);
+
+               /* calculate sum and sum of squares */
+
+               map_100ns = div64_ul(map_delta,  100);
+               unmap_100ns = div64_ul(unmap_delta, 100);
+               map_sq = map_100ns * map_100ns;
+               unmap_sq = unmap_100ns * unmap_100ns;
+
+               atomic64_add(map_100ns, &map->sum_map_100ns);
+               atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
+               atomic64_add(map_sq, &map->sum_sq_map);
+               atomic64_add(unmap_sq, &map->sum_sq_unmap);
+               atomic64_inc(&map->loops);
+       }
+
+out:
+       free_page((unsigned long)buf);
+       return ret;
+}
+
+static int do_map_benchmark(struct map_benchmark_data *map)
+{
+       struct task_struct **tsk;
+       int threads = map->bparam.threads;
+       int node = map->bparam.node;
+       const cpumask_t *cpu_mask = cpumask_of_node(node);
+       u64 loops;
+       int ret = 0;
+       int i;
+
+       tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
+       if (!tsk)
+               return -ENOMEM;
+
+       get_device(map->dev);
+
+       for (i = 0; i < threads; i++) {
+               tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
+                               map->bparam.node, "dma-map-benchmark/%d", i);
+               if (IS_ERR(tsk[i])) {
+                       pr_err("create dma_map thread failed\n");
+                       ret = PTR_ERR(tsk[i]);
+                       goto out;
+               }
+
+               if (node != NUMA_NO_NODE)
+                       kthread_bind_mask(tsk[i], cpu_mask);
+       }
+
+       /* clear the old value in the previous benchmark */
+       atomic64_set(&map->sum_map_100ns, 0);
+       atomic64_set(&map->sum_unmap_100ns, 0);
+       atomic64_set(&map->sum_sq_map, 0);
+       atomic64_set(&map->sum_sq_unmap, 0);
+       atomic64_set(&map->loops, 0);
+
+       for (i = 0; i < threads; i++)
+               wake_up_process(tsk[i]);
+
+       msleep_interruptible(map->bparam.seconds * 1000);
+
+       /* wait for the completion of benchmark threads */
+       for (i = 0; i < threads; i++) {
+               ret = kthread_stop(tsk[i]);
+               if (ret)
+                       goto out;
+       }
+
+       loops = atomic64_read(&map->loops);
+       if (likely(loops > 0)) {
+               u64 map_variance, unmap_variance;
+               u64 sum_map = atomic64_read(&map->sum_map_100ns);
+               u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
+               u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
+               u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
+
+               /* average latency */
+               map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
+               map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
+
+               /* standard deviation of latency */
+               map_variance = div64_u64(sum_sq_map, loops) -
+                               map->bparam.avg_map_100ns *
+                               map->bparam.avg_map_100ns;
+               unmap_variance = div64_u64(sum_sq_unmap, loops) -
+                               map->bparam.avg_unmap_100ns *
+                               map->bparam.avg_unmap_100ns;
+               map->bparam.map_stddev = int_sqrt64(map_variance);
+               map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
+       }
+
+out:
+       put_device(map->dev);
+       kfree(tsk);
+       return ret;
+}
+
+static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
+               unsigned long arg)
+{
+       struct map_benchmark_data *map = file->private_data;
+       void __user *argp = (void __user *)arg;
+       u64 old_dma_mask;
+
+       int ret;
+
+       if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
+               return -EFAULT;
+
+       switch (cmd) {
+       case DMA_MAP_BENCHMARK:
+               if (map->bparam.threads == 0 ||
+                   map->bparam.threads > DMA_MAP_MAX_THREADS) {
+                       pr_err("invalid thread number\n");
+                       return -EINVAL;
+               }
+
+               if (map->bparam.seconds == 0 ||
+                   map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
+                       pr_err("invalid duration seconds\n");
+                       return -EINVAL;
+               }
+
+               if (map->bparam.node != NUMA_NO_NODE &&
+                   !node_possible(map->bparam.node)) {
+                       pr_err("invalid numa node\n");
+                       return -EINVAL;
+               }
+
+               switch (map->bparam.dma_dir) {
+               case DMA_MAP_BIDIRECTIONAL:
+                       map->dir = DMA_BIDIRECTIONAL;
+                       break;
+               case DMA_MAP_FROM_DEVICE:
+                       map->dir = DMA_FROM_DEVICE;
+                       break;
+               case DMA_MAP_TO_DEVICE:
+                       map->dir = DMA_TO_DEVICE;
+                       break;
+               default:
+                       pr_err("invalid DMA direction\n");
+                       return -EINVAL;
+               }
+
+               old_dma_mask = dma_get_mask(map->dev);
+
+               ret = dma_set_mask(map->dev,
+                                  DMA_BIT_MASK(map->bparam.dma_bits));
+               if (ret) {
+                       pr_err("failed to set dma_mask on device %s\n",
+                               dev_name(map->dev));
+                       return -EINVAL;
+               }
+
+               ret = do_map_benchmark(map);
+
+               /*
+                * restore the original dma_mask as many devices' dma_mask are
+                * set by architectures, acpi, busses. When we bind them back
+                * to their original drivers, those drivers shouldn't see
+                * dma_mask changed by benchmark
+                */
+               dma_set_mask(map->dev, old_dma_mask);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
+               return -EFAULT;
+
+       return ret;
+}
+
+static const struct file_operations map_benchmark_fops = {
+       .open                   = simple_open,
+       .unlocked_ioctl         = map_benchmark_ioctl,
+};
+
+static void map_benchmark_remove_debugfs(void *data)
+{
+       struct map_benchmark_data *map = (struct map_benchmark_data *)data;
+
+       debugfs_remove(map->debugfs);
+}
+
+static int __map_benchmark_probe(struct device *dev)
+{
+       struct dentry *entry;
+       struct map_benchmark_data *map;
+       int ret;
+
+       map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
+       if (!map)
+               return -ENOMEM;
+       map->dev = dev;
+
+       ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
+       if (ret) {
+               pr_err("Can't add debugfs remove action\n");
+               return ret;
+       }
+
+       /*
+        * we only permit a device bound with this driver, 2nd probe
+        * will fail
+        */
+       entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
+                       &map_benchmark_fops);
+       if (IS_ERR(entry))
+               return PTR_ERR(entry);
+       map->debugfs = entry;
+
+       return 0;
+}
+
+static int map_benchmark_platform_probe(struct platform_device *pdev)
+{
+       return __map_benchmark_probe(&pdev->dev);
+}
+
+static struct platform_driver map_benchmark_platform_driver = {
+       .driver         = {
+               .name   = "dma_map_benchmark",
+       },
+       .probe = map_benchmark_platform_probe,
+};
+
+static int
+map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       return __map_benchmark_probe(&pdev->dev);
+}
+
+static struct pci_driver map_benchmark_pci_driver = {
+       .name   = "dma_map_benchmark",
+       .probe  = map_benchmark_pci_probe,
+};
+
+static int __init map_benchmark_init(void)
+{
+       int ret;
+
+       ret = pci_register_driver(&map_benchmark_pci_driver);
+       if (ret)
+               return ret;
+
+       ret = platform_driver_register(&map_benchmark_platform_driver);
+       if (ret) {
+               pci_unregister_driver(&map_benchmark_pci_driver);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void __exit map_benchmark_cleanup(void)
+{
+       platform_driver_unregister(&map_benchmark_platform_driver);
+       pci_unregister_driver(&map_benchmark_pci_driver);
+}
+
+module_init(map_benchmark_init);
+module_exit(map_benchmark_cleanup);
+
+MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>");
+MODULE_DESCRIPTION("dma_map benchmark driver");
+MODULE_LICENSE("GPL");
index 51bb8fa..f87a89d 100644 (file)
@@ -149,7 +149,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
        if (WARN_ON_ONCE(!dev->dma_mask))
                return DMA_MAPPING_ERROR;
 
-       if (dma_map_direct(dev, ops))
+       if (dma_map_direct(dev, ops) ||
+           arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size))
                addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
        else
                addr = ops->map_page(dev, page, offset, size, dir, attrs);
@@ -165,7 +166,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
        const struct dma_map_ops *ops = get_dma_ops(dev);
 
        BUG_ON(!valid_dma_direction(dir));
-       if (dma_map_direct(dev, ops))
+       if (dma_map_direct(dev, ops) ||
+           arch_dma_unmap_page_direct(dev, addr + size))
                dma_direct_unmap_page(dev, addr, size, dir, attrs);
        else if (ops->unmap_page)
                ops->unmap_page(dev, addr, size, dir, attrs);
@@ -188,7 +190,8 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
        if (WARN_ON_ONCE(!dev->dma_mask))
                return 0;
 
-       if (dma_map_direct(dev, ops))
+       if (dma_map_direct(dev, ops) ||
+           arch_dma_map_sg_direct(dev, sg, nents))
                ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
        else
                ents = ops->map_sg(dev, sg, nents, dir, attrs);
@@ -207,7 +210,8 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
 
        BUG_ON(!valid_dma_direction(dir));
        debug_dma_unmap_sg(dev, sg, nents, dir);
-       if (dma_map_direct(dev, ops))
+       if (dma_map_direct(dev, ops) ||
+           arch_dma_unmap_sg_direct(dev, sg, nents))
                dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
        else if (ops->unmap_sg)
                ops->unmap_sg(dev, sg, nents, dir, attrs);
index d4637f7..5f84e6c 100644 (file)
@@ -38,9 +38,6 @@ static void __init dma_atomic_pool_debugfs_init(void)
        struct dentry *root;
 
        root = debugfs_create_dir("dma_pools", NULL);
-       if (IS_ERR_OR_NULL(root))
-               return;
-
        debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
        debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
        debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
index 41906a5..37720a6 100644 (file)
@@ -225,8 +225,8 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
                if (!s)
                        continue;
 
-               /* Clear the KASAN shadow of the stack. */
-               kasan_unpoison_shadow(s->addr, THREAD_SIZE);
+               /* Mark stack accessible for KASAN. */
+               kasan_unpoison_range(s->addr, THREAD_SIZE);
 
                /* Clear stale pointers from reused stack. */
                memset(s->addr, 0, THREAD_SIZE);
index b0ad37b..6931f0c 100644 (file)
@@ -53,6 +53,7 @@ struct sugov_cpu {
        unsigned int            iowait_boost;
        u64                     last_update;
 
+       unsigned long           util;
        unsigned long           bw_dl;
        unsigned long           max;
 
@@ -276,16 +277,15 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
        return min(max, util);
 }
 
-static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
+static void sugov_get_util(struct sugov_cpu *sg_cpu)
 {
        struct rq *rq = cpu_rq(sg_cpu->cpu);
-       unsigned long util = cpu_util_cfs(rq);
        unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
 
        sg_cpu->max = max;
        sg_cpu->bw_dl = cpu_bw_dl(rq);
-
-       return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL);
+       sg_cpu->util = schedutil_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max,
+                                         FREQUENCY_UTIL, NULL);
 }
 
 /**
@@ -362,8 +362,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
  * sugov_iowait_apply() - Apply the IO boost to a CPU.
  * @sg_cpu: the sugov data for the cpu to boost
  * @time: the update time from the caller
- * @util: the utilization to (eventually) boost
- * @max: the maximum value the utilization can be boosted to
  *
  * A CPU running a task which woken up after an IO operation can have its
  * utilization boosted to speed up the completion of those IO operations.
@@ -377,18 +375,17 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
  * This mechanism is designed to boost high frequently IO waiting tasks, while
  * being more conservative on tasks which does sporadic IO operations.
  */
-static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
-                                       unsigned long util, unsigned long max)
+static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
 {
        unsigned long boost;
 
        /* No boost currently required */
        if (!sg_cpu->iowait_boost)
-               return util;
+               return;
 
        /* Reset boost if the CPU appears to have been idle enough */
        if (sugov_iowait_reset(sg_cpu, time, false))
-               return util;
+               return;
 
        if (!sg_cpu->iowait_boost_pending) {
                /*
@@ -397,18 +394,19 @@ static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
                sg_cpu->iowait_boost >>= 1;
                if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
                        sg_cpu->iowait_boost = 0;
-                       return util;
+                       return;
                }
        }
 
        sg_cpu->iowait_boost_pending = false;
 
        /*
-        * @util is already in capacity scale; convert iowait_boost
+        * sg_cpu->util is already in capacity scale; convert iowait_boost
         * into the same scale so we can compare.
         */
-       boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
-       return max(boost, util);
+       boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
+       if (sg_cpu->util < boost)
+               sg_cpu->util = boost;
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -434,14 +432,10 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_p
                sg_policy->limits_changed = true;
 }
 
-static void sugov_update_single(struct update_util_data *hook, u64 time,
-                               unsigned int flags)
+static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
+                                             u64 time, unsigned int flags)
 {
-       struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
-       unsigned long util, max;
-       unsigned int next_f;
-       unsigned int cached_freq = sg_policy->cached_raw_freq;
 
        sugov_iowait_boost(sg_cpu, time, flags);
        sg_cpu->last_update = time;
@@ -449,12 +443,26 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
        ignore_dl_rate_limit(sg_cpu, sg_policy);
 
        if (!sugov_should_update_freq(sg_policy, time))
+               return false;
+
+       sugov_get_util(sg_cpu);
+       sugov_iowait_apply(sg_cpu, time);
+
+       return true;
+}
+
+static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
+                                    unsigned int flags)
+{
+       struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+       unsigned int cached_freq = sg_policy->cached_raw_freq;
+       unsigned int next_f;
+
+       if (!sugov_update_single_common(sg_cpu, time, flags))
                return;
 
-       util = sugov_get_util(sg_cpu);
-       max = sg_cpu->max;
-       util = sugov_iowait_apply(sg_cpu, time, util, max);
-       next_f = get_next_freq(sg_policy, util, max);
+       next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
        /*
         * Do not reduce the frequency if the CPU has not been idle
         * recently, as the reduction is likely to be premature then.
@@ -480,6 +488,38 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
        }
 }
 
+static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
+                                    unsigned int flags)
+{
+       struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+       unsigned long prev_util = sg_cpu->util;
+
+       /*
+        * Fall back to the "frequency" path if frequency invariance is not
+        * supported, because the direct mapping between the utilization and
+        * the performance levels depends on the frequency invariance.
+        */
+       if (!arch_scale_freq_invariant()) {
+               sugov_update_single_freq(hook, time, flags);
+               return;
+       }
+
+       if (!sugov_update_single_common(sg_cpu, time, flags))
+               return;
+
+       /*
+        * Do not reduce the target performance level if the CPU has not been
+        * idle recently, as the reduction is likely to be premature then.
+        */
+       if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
+               sg_cpu->util = prev_util;
+
+       cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
+                                  map_util_perf(sg_cpu->util), sg_cpu->max);
+
+       sg_cpu->sg_policy->last_freq_update_time = time;
+}
+
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
@@ -491,9 +531,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
                struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
                unsigned long j_util, j_max;
 
-               j_util = sugov_get_util(j_sg_cpu);
+               sugov_get_util(j_sg_cpu);
+               sugov_iowait_apply(j_sg_cpu, time);
+               j_util = j_sg_cpu->util;
                j_max = j_sg_cpu->max;
-               j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
 
                if (j_util * max > j_max * util) {
                        util = j_util;
@@ -817,6 +858,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
 static int sugov_start(struct cpufreq_policy *policy)
 {
        struct sugov_policy *sg_policy = policy->governor_data;
+       void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
        unsigned int cpu;
 
        sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
@@ -836,13 +878,17 @@ static int sugov_start(struct cpufreq_policy *policy)
                sg_cpu->sg_policy               = sg_policy;
        }
 
+       if (policy_is_shared(policy))
+               uu = sugov_update_shared;
+       else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
+               uu = sugov_update_single_perf;
+       else
+               uu = sugov_update_single_freq;
+
        for_each_cpu(cpu, policy->cpus) {
                struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 
-               cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-                                            policy_is_shared(policy) ?
-                                                       sugov_update_shared :
-                                                       sugov_update_single);
+               cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
        }
        return 0;
 }
index 01f5d30..183cc6a 100644 (file)
@@ -37,6 +37,17 @@ void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue
 }
 EXPORT_SYMBOL(add_wait_queue_exclusive);
 
+void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+       unsigned long flags;
+
+       wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
+       spin_lock_irqsave(&wq_head->lock, flags);
+       __add_wait_queue(wq_head, wq_entry);
+       spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL_GPL(add_wait_queue_priority);
+
 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
        unsigned long flags;
@@ -57,7 +68,11 @@ EXPORT_SYMBOL(remove_wait_queue);
 /*
  * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
  * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
- * number) then we wake all the non-exclusive tasks and one exclusive task.
+ * number) then we wake that number of exclusive tasks, and potentially all
+ * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
+ * the list and any non-exclusive tasks will be woken first. A priority task
+ * may be at the head of the list, and can consume the event without any other
+ * tasks being woken.
  *
  * There are circumstances in which we can try to wake a task which has already
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
index f27ac94..19aa806 100644 (file)
@@ -68,6 +68,8 @@ COND_SYSCALL(epoll_create1);
 COND_SYSCALL(epoll_ctl);
 COND_SYSCALL(epoll_pwait);
 COND_SYSCALL_COMPAT(epoll_pwait);
+COND_SYSCALL(epoll_pwait2);
+COND_SYSCALL_COMPAT(epoll_pwait2);
 
 /* fs/fcntl.c */
 
index b46a9fd..4680633 100644 (file)
@@ -686,6 +686,9 @@ config GENERIC_LIB_CMPDI2
 config GENERIC_LIB_UCMPDI2
        bool
 
+config GENERIC_LIB_DEVMEM_IS_ALLOWED
+       bool
+
 config PLDMFW
        bool
        default n
index 7d7097c..e6e58b2 100644 (file)
@@ -1676,7 +1676,7 @@ config ARCH_HAS_DEVMEM_IS_ALLOWED
 config STRICT_DEVMEM
        bool "Filter access to /dev/mem"
        depends on MMU && DEVMEM
-       depends on ARCH_HAS_DEVMEM_IS_ALLOWED
+       depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED
        default y if PPC || X86 || ARM64
        help
          If this option is disabled, you allow userspace (root) access to all
index 8fb0970..f5fa4ba 100644 (file)
@@ -6,7 +6,10 @@ config HAVE_ARCH_KASAN
 config HAVE_ARCH_KASAN_SW_TAGS
        bool
 
-config HAVE_ARCH_KASAN_VMALLOC
+config HAVE_ARCH_KASAN_HW_TAGS
+       bool
+
+config HAVE_ARCH_KASAN_VMALLOC
        bool
 
 config CC_HAS_KASAN_GENERIC
@@ -15,15 +18,20 @@ config CC_HAS_KASAN_GENERIC
 config CC_HAS_KASAN_SW_TAGS
        def_bool $(cc-option, -fsanitize=kernel-hwaddress)
 
+# This option is only required for software KASAN modes.
+# Old GCC versions don't have proper support for no_sanitize_address.
+# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89124 for details.
 config CC_HAS_WORKING_NOSANITIZE_ADDRESS
        def_bool !CC_IS_GCC || GCC_VERSION >= 80300
 
 menuconfig KASAN
        bool "KASAN: runtime memory debugger"
-       depends on (HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \
-                  (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)
+       depends on (((HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC) || \
+                    (HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS)) && \
+                   CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \
+                  HAVE_ARCH_KASAN_HW_TAGS
        depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
-       depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS
+       select STACKDEPOT
        help
          Enables KASAN (KernelAddressSANitizer) - runtime memory debugger,
          designed to find out-of-bounds accesses and use-after-free bugs.
@@ -35,21 +43,24 @@ choice
        prompt "KASAN mode"
        default KASAN_GENERIC
        help
-         KASAN has two modes: generic KASAN (similar to userspace ASan,
-         x86_64/arm64/xtensa, enabled with CONFIG_KASAN_GENERIC) and
-         software tag-based KASAN (a version based on software memory
-         tagging, arm64 only, similar to userspace HWASan, enabled with
-         CONFIG_KASAN_SW_TAGS).
+         KASAN has three modes:
+         1. generic KASAN (similar to userspace ASan,
+            x86_64/arm64/xtensa, enabled with CONFIG_KASAN_GENERIC),
+         2. software tag-based KASAN (arm64 only, based on software
+            memory tagging (similar to userspace HWASan), enabled with
+            CONFIG_KASAN_SW_TAGS), and
+         3. hardware tag-based KASAN (arm64 only, based on hardware
+            memory tagging, enabled with CONFIG_KASAN_HW_TAGS).
+
+         All KASAN modes are strictly debugging features.
 
-         Both generic and tag-based KASAN are strictly debugging features.
+         For better error reports enable CONFIG_STACKTRACE.
 
 config KASAN_GENERIC
        bool "Generic mode"
        depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC
-       depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
        select SLUB_DEBUG if SLUB
        select CONSTRUCTORS
-       select STACKDEPOT
        help
          Enables generic KASAN mode.
 
@@ -62,23 +73,22 @@ config KASAN_GENERIC
          and introduces an overhead of ~x1.5 for the rest of the allocations.
          The performance slowdown is ~x3.
 
-         For better error detection enable CONFIG_STACKTRACE.
-
          Currently CONFIG_KASAN_GENERIC doesn't work with CONFIG_DEBUG_SLAB
          (the resulting kernel does not boot).
 
 config KASAN_SW_TAGS
        bool "Software tag-based mode"
        depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS
-       depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
        select SLUB_DEBUG if SLUB
        select CONSTRUCTORS
-       select STACKDEPOT
        help
          Enables software tag-based KASAN mode.
 
-         This mode requires Top Byte Ignore support by the CPU and therefore
-         is only supported for arm64. This mode requires Clang.
+         This mode require software memory tagging support in the form of
+         HWASan-like compiler instrumentation.
+
+         Currently this mode is only implemented for arm64 CPUs and relies on
+         Top Byte Ignore. This mode requires Clang.
 
          This mode consumes about 1/16th of available memory at kernel start
          and introduces an overhead of ~20% for the rest of the allocations.
@@ -86,15 +96,27 @@ config KASAN_SW_TAGS
          casting and comparison, as it embeds tags into the top byte of each
          pointer.
 
-         For better error detection enable CONFIG_STACKTRACE.
-
          Currently CONFIG_KASAN_SW_TAGS doesn't work with CONFIG_DEBUG_SLAB
          (the resulting kernel does not boot).
 
+config KASAN_HW_TAGS
+       bool "Hardware tag-based mode"
+       depends on HAVE_ARCH_KASAN_HW_TAGS
+       depends on SLUB
+       help
+         Enables hardware tag-based KASAN mode.
+
+         This mode requires hardware memory tagging support, and can be used
+         by any architecture that provides it.
+
+         Currently this mode is only implemented for arm64 CPUs starting from
+         ARMv8.5 and relies on Memory Tagging Extension and Top Byte Ignore.
+
 endchoice
 
 choice
        prompt "Instrumentation type"
+       depends on KASAN_GENERIC || KASAN_SW_TAGS
        default KASAN_OUTLINE
 
 config KASAN_OUTLINE
@@ -118,6 +140,7 @@ endchoice
 
 config KASAN_STACK_ENABLE
        bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST
+       depends on KASAN_GENERIC || KASAN_SW_TAGS
        help
          The LLVM stack address sanitizer has a know problem that
          causes excessive stack usage in a lot of functions, see
@@ -146,7 +169,7 @@ config KASAN_SW_TAGS_IDENTIFY
 
 config KASAN_VMALLOC
        bool "Back mappings in vmalloc space with real shadow memory"
-       depends on HAVE_ARCH_KASAN_VMALLOC
+       depends on KASAN_GENERIC && HAVE_ARCH_KASAN_VMALLOC
        help
          By default, the shadow region for vmalloc space is the read-only
          zero page. This means that KASAN cannot detect errors involving
index 8598e87..afeff05 100644 (file)
@@ -354,3 +354,5 @@ obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
 obj-$(CONFIG_BITS_TEST) += test_bits.o
 obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
+
+obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
diff --git a/lib/devmem_is_allowed.c b/lib/devmem_is_allowed.c
new file mode 100644 (file)
index 0000000..c0d67c5
--- /dev/null
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * A generic version of devmem_is_allowed.
+ *
+ * Based on arch/arm64/mm/mmap.c
+ *
+ * Copyright (C) 2020 Google, Inc.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/mm.h>
+#include <linux/ioport.h>
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.  We mimic x86 here by
+ * disallowing access to system RAM as well as device-exclusive MMIO regions.
+ * This effectively disable read()/write() on /dev/mem.
+ */
+int devmem_is_allowed(unsigned long pfn)
+{
+       if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+               return 0;
+       if (!page_is_ram(pfn))
+               return 1;
+       return 0;
+}
index 662f862..2947274 100644 (file)
@@ -25,7 +25,7 @@
 
 #include "../mm/kasan/kasan.h"
 
-#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE)
+#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
 
 /*
  * We assign some test results to these globals to make sure the tests
index 62a8785..3b4cc77 100644 (file)
@@ -15,7 +15,7 @@
 
 #include "../mm/kasan/kasan.h"
 
-#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE)
+#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
 
 static noinline void __init copy_user_test(void)
 {
index 4275c25..f730605 100644 (file)
@@ -713,7 +713,7 @@ config ZSMALLOC_STAT
        select DEBUG_FS
        help
          This option enables code in the zsmalloc to collect various
-         statistics about whats happening in zsmalloc and exports that
+         statistics about what's happening in zsmalloc and exports that
          information to userspace via debugfs.
          If unsure, say N.
 
index 7a49bac..5c9d564 100644 (file)
@@ -2453,6 +2453,9 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
 
        if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
                return 0;
+       if (unlikely(!iov_iter_count(iter)))
+               return 0;
+
        iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
 
        if (nr_pages > ARRAY_SIZE(pages_onstack))
index 370d970..9fe39a6 100644 (file)
@@ -6,12 +6,15 @@ KCOV_INSTRUMENT := n
 # Disable ftrace to avoid recursion.
 CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_generic_report.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_quarantine.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_report.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_tags_report.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report_generic.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report_hw_tags.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_report_sw_tags.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_shadow.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_hw_tags.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sw_tags.o = $(CC_FLAGS_FTRACE)
 
 # Function splitter causes unnecessary splits in __asan_load1/__asan_store1
 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
@@ -22,13 +25,17 @@ CC_FLAGS_KASAN_RUNTIME += -DDISABLE_BRANCH_PROFILING
 
 CFLAGS_common.o := $(CC_FLAGS_KASAN_RUNTIME)
 CFLAGS_generic.o := $(CC_FLAGS_KASAN_RUNTIME)
-CFLAGS_generic_report.o := $(CC_FLAGS_KASAN_RUNTIME)
 CFLAGS_init.o := $(CC_FLAGS_KASAN_RUNTIME)
 CFLAGS_quarantine.o := $(CC_FLAGS_KASAN_RUNTIME)
 CFLAGS_report.o := $(CC_FLAGS_KASAN_RUNTIME)
-CFLAGS_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
-CFLAGS_tags_report.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_report_generic.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_report_hw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_report_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_shadow.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_hw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
+CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME)
 
-obj-$(CONFIG_KASAN) := common.o init.o report.o
-obj-$(CONFIG_KASAN_GENERIC) += generic.o generic_report.o quarantine.o
-obj-$(CONFIG_KASAN_SW_TAGS) += tags.o tags_report.o
+obj-$(CONFIG_KASAN) := common.o report.o
+obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o report_generic.o shadow.o quarantine.o
+obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o
+obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o
index 950fd37..b251676 100644 (file)
@@ -1,24 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * This file contains common generic and tag-based KASAN code.
+ * This file contains common KASAN code.
  *
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
  *
  * Some code borrowed from https://github.com/xairy/kasan-prototype by
  *        Andrey Konovalov <andreyknvl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
-#include <linux/kmemleak.h>
 #include <linux/linkage.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/vmalloc.h>
 #include <linux/bug.h>
 
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
 #include "kasan.h"
 #include "../slab.h"
 
@@ -56,6 +46,7 @@ void kasan_set_track(struct kasan_track *track, gfp_t flags)
        track->stack = kasan_save_stack(flags);
 }
 
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 void kasan_enable_current(void)
 {
        current->kasan_depth++;
@@ -65,106 +56,20 @@ void kasan_disable_current(void)
 {
        current->kasan_depth--;
 }
+#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
-bool __kasan_check_read(const volatile void *p, unsigned int size)
-{
-       return check_memory_region((unsigned long)p, size, false, _RET_IP_);
-}
-EXPORT_SYMBOL(__kasan_check_read);
-
-bool __kasan_check_write(const volatile void *p, unsigned int size)
-{
-       return check_memory_region((unsigned long)p, size, true, _RET_IP_);
-}
-EXPORT_SYMBOL(__kasan_check_write);
-
-#undef memset
-void *memset(void *addr, int c, size_t len)
-{
-       if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_))
-               return NULL;
-
-       return __memset(addr, c, len);
-}
-
-#ifdef __HAVE_ARCH_MEMMOVE
-#undef memmove
-void *memmove(void *dest, const void *src, size_t len)
-{
-       if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
-           !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
-               return NULL;
-
-       return __memmove(dest, src, len);
-}
-#endif
-
-#undef memcpy
-void *memcpy(void *dest, const void *src, size_t len)
+void __kasan_unpoison_range(const void *address, size_t size)
 {
-       if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
-           !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
-               return NULL;
-
-       return __memcpy(dest, src, len);
-}
-
-/*
- * Poisons the shadow memory for 'size' bytes starting from 'addr'.
- * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.
- */
-void kasan_poison_shadow(const void *address, size_t size, u8 value)
-{
-       void *shadow_start, *shadow_end;
-
-       /*
-        * Perform shadow offset calculation based on untagged address, as
-        * some of the callers (e.g. kasan_poison_object_data) pass tagged
-        * addresses to this function.
-        */
-       address = reset_tag(address);
-
-       shadow_start = kasan_mem_to_shadow(address);
-       shadow_end = kasan_mem_to_shadow(address + size);
-
-       __memset(shadow_start, value, shadow_end - shadow_start);
-}
-
-void kasan_unpoison_shadow(const void *address, size_t size)
-{
-       u8 tag = get_tag(address);
-
-       /*
-        * Perform shadow offset calculation based on untagged address, as
-        * some of the callers (e.g. kasan_unpoison_object_data) pass tagged
-        * addresses to this function.
-        */
-       address = reset_tag(address);
-
-       kasan_poison_shadow(address, size, tag);
-
-       if (size & KASAN_SHADOW_MASK) {
-               u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size);
-
-               if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-                       *shadow = tag;
-               else
-                       *shadow = size & KASAN_SHADOW_MASK;
-       }
-}
-
-static void __kasan_unpoison_stack(struct task_struct *task, const void *sp)
-{
-       void *base = task_stack_page(task);
-       size_t size = sp - base;
-
-       kasan_unpoison_shadow(base, size);
+       unpoison_range(address, size);
 }
 
+#if CONFIG_KASAN_STACK
 /* Unpoison the entire stack for a task. */
 void kasan_unpoison_task_stack(struct task_struct *task)
 {
-       __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE);
+       void *base = task_stack_page(task);
+
+       unpoison_range(base, THREAD_SIZE);
 }
 
 /* Unpoison the stack for the current task beyond a watermark sp value. */
@@ -177,10 +82,22 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
         */
        void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1));
 
-       kasan_unpoison_shadow(base, watermark - base);
+       unpoison_range(base, watermark - base);
+}
+#endif /* CONFIG_KASAN_STACK */
+
+/*
+ * Only allow cache merging when stack collection is disabled and no metadata
+ * is present.
+ */
+slab_flags_t __kasan_never_merge(void)
+{
+       if (kasan_stack_collection_enabled())
+               return SLAB_KASAN;
+       return 0;
 }
 
-void kasan_alloc_pages(struct page *page, unsigned int order)
+void __kasan_alloc_pages(struct page *page, unsigned int order)
 {
        u8 tag;
        unsigned long i;
@@ -191,13 +108,13 @@ void kasan_alloc_pages(struct page *page, unsigned int order)
        tag = random_tag();
        for (i = 0; i < (1 << order); i++)
                page_kasan_tag_set(page + i, tag);
-       kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order);
+       unpoison_range(page_address(page), PAGE_SIZE << order);
 }
 
-void kasan_free_pages(struct page *page, unsigned int order)
+void __kasan_free_pages(struct page *page, unsigned int order)
 {
        if (likely(!PageHighMem(page)))
-               kasan_poison_shadow(page_address(page),
+               poison_range(page_address(page),
                                PAGE_SIZE << order,
                                KASAN_FREE_PAGE);
 }
@@ -208,9 +125,6 @@ void kasan_free_pages(struct page *page, unsigned int order)
  */
 static inline unsigned int optimal_redzone(unsigned int object_size)
 {
-       if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-               return 0;
-
        return
                object_size <= 64        - 16   ? 16 :
                object_size <= 128       - 32   ? 32 :
@@ -221,88 +135,129 @@ static inline unsigned int optimal_redzone(unsigned int object_size)
                object_size <= (1 << 16) - 1024 ? 1024 : 2048;
 }
 
-void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
-                       slab_flags_t *flags)
+void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
+                         slab_flags_t *flags)
 {
-       unsigned int orig_size = *size;
-       unsigned int redzone_size;
-       int redzone_adjust;
+       unsigned int ok_size;
+       unsigned int optimal_size;
 
-       /* Add alloc meta. */
-       cache->kasan_info.alloc_meta_offset = *size;
-       *size += sizeof(struct kasan_alloc_meta);
+       /*
+        * SLAB_KASAN is used to mark caches as ones that are sanitized by
+        * KASAN. Currently this flag is used in two places:
+        * 1. In slab_ksize() when calculating the size of the accessible
+        *    memory within the object.
+        * 2. In slab_common.c to prevent merging of sanitized caches.
+        */
+       *flags |= SLAB_KASAN;
 
-       /* Add free meta. */
-       if (IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-           (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor ||
-            cache->object_size < sizeof(struct kasan_free_meta))) {
-               cache->kasan_info.free_meta_offset = *size;
-               *size += sizeof(struct kasan_free_meta);
-       }
+       if (!kasan_stack_collection_enabled())
+               return;
 
-       redzone_size = optimal_redzone(cache->object_size);
-       redzone_adjust = redzone_size - (*size - cache->object_size);
-       if (redzone_adjust > 0)
-               *size += redzone_adjust;
+       ok_size = *size;
 
-       *size = min_t(unsigned int, KMALLOC_MAX_SIZE,
-                       max(*size, cache->object_size + redzone_size));
+       /* Add alloc meta into redzone. */
+       cache->kasan_info.alloc_meta_offset = *size;
+       *size += sizeof(struct kasan_alloc_meta);
 
        /*
-        * If the metadata doesn't fit, don't enable KASAN at all.
+        * If alloc meta doesn't fit, don't add it.
+        * This can only happen with SLAB, as it has KMALLOC_MAX_SIZE equal
+        * to KMALLOC_MAX_CACHE_SIZE and doesn't fall back to page_alloc for
+        * larger sizes.
         */
-       if (*size <= cache->kasan_info.alloc_meta_offset ||
-                       *size <= cache->kasan_info.free_meta_offset) {
+       if (*size > KMALLOC_MAX_SIZE) {
                cache->kasan_info.alloc_meta_offset = 0;
-               cache->kasan_info.free_meta_offset = 0;
-               *size = orig_size;
+               *size = ok_size;
+               /* Continue, since free meta might still fit. */
+       }
+
+       /* Only the generic mode uses free meta or flexible redzones. */
+       if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+               cache->kasan_info.free_meta_offset = KASAN_NO_FREE_META;
                return;
        }
 
-       *flags |= SLAB_KASAN;
+       /*
+        * Add free meta into redzone when it's not possible to store
+        * it in the object. This is the case when:
+        * 1. Object is SLAB_TYPESAFE_BY_RCU, which means that it can
+        *    be touched after it was freed, or
+        * 2. Object has a constructor, which means it's expected to
+        *    retain its content until the next allocation, or
+        * 3. Object is too small.
+        * Otherwise cache->kasan_info.free_meta_offset = 0 is implied.
+        */
+       if ((cache->flags & SLAB_TYPESAFE_BY_RCU) || cache->ctor ||
+           cache->object_size < sizeof(struct kasan_free_meta)) {
+               ok_size = *size;
+
+               cache->kasan_info.free_meta_offset = *size;
+               *size += sizeof(struct kasan_free_meta);
+
+               /* If free meta doesn't fit, don't add it. */
+               if (*size > KMALLOC_MAX_SIZE) {
+                       cache->kasan_info.free_meta_offset = KASAN_NO_FREE_META;
+                       *size = ok_size;
+               }
+       }
+
+       /* Calculate size with optimal redzone. */
+       optimal_size = cache->object_size + optimal_redzone(cache->object_size);
+       /* Limit it with KMALLOC_MAX_SIZE (relevant for SLAB only). */
+       if (optimal_size > KMALLOC_MAX_SIZE)
+               optimal_size = KMALLOC_MAX_SIZE;
+       /* Use optimal size if the size with added metas is not large enough. */
+       if (*size < optimal_size)
+               *size = optimal_size;
 }
 
-size_t kasan_metadata_size(struct kmem_cache *cache)
+size_t __kasan_metadata_size(struct kmem_cache *cache)
 {
+       if (!kasan_stack_collection_enabled())
+               return 0;
        return (cache->kasan_info.alloc_meta_offset ?
                sizeof(struct kasan_alloc_meta) : 0) +
                (cache->kasan_info.free_meta_offset ?
                sizeof(struct kasan_free_meta) : 0);
 }
 
-struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
-                                       const void *object)
+struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
+                                             const void *object)
 {
-       return (void *)object + cache->kasan_info.alloc_meta_offset;
+       if (!cache->kasan_info.alloc_meta_offset)
+               return NULL;
+       return kasan_reset_tag(object) + cache->kasan_info.alloc_meta_offset;
 }
 
-struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
-                                     const void *object)
+#ifdef CONFIG_KASAN_GENERIC
+struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
+                                           const void *object)
 {
        BUILD_BUG_ON(sizeof(struct kasan_free_meta) > 32);
-       return (void *)object + cache->kasan_info.free_meta_offset;
+       if (cache->kasan_info.free_meta_offset == KASAN_NO_FREE_META)
+               return NULL;
+       return kasan_reset_tag(object) + cache->kasan_info.free_meta_offset;
 }
+#endif
 
-void kasan_poison_slab(struct page *page)
+void __kasan_poison_slab(struct page *page)
 {
        unsigned long i;
 
        for (i = 0; i < compound_nr(page); i++)
                page_kasan_tag_reset(page + i);
-       kasan_poison_shadow(page_address(page), page_size(page),
-                       KASAN_KMALLOC_REDZONE);
+       poison_range(page_address(page), page_size(page),
+                    KASAN_KMALLOC_REDZONE);
 }
 
-void kasan_unpoison_object_data(struct kmem_cache *cache, void *object)
+void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object)
 {
-       kasan_unpoison_shadow(object, cache->object_size);
+       unpoison_range(object, cache->object_size);
 }
 
-void kasan_poison_object_data(struct kmem_cache *cache, void *object)
+void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
 {
-       kasan_poison_shadow(object,
-                       round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
-                       KASAN_KMALLOC_REDZONE);
+       poison_range(object, cache->object_size, KASAN_KMALLOC_REDZONE);
 }
 
 /*
@@ -322,6 +277,9 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object)
 static u8 assign_tag(struct kmem_cache *cache, const void *object,
                        bool init, bool keep_tag)
 {
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               return 0xff;
+
        /*
         * 1. When an object is kmalloc()'ed, two hooks are called:
         *    kasan_slab_alloc() and kasan_kmalloc(). We assign the
@@ -351,50 +309,32 @@ static u8 assign_tag(struct kmem_cache *cache, const void *object,
 #endif
 }
 
-void * __must_check kasan_init_slab_obj(struct kmem_cache *cache,
+void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
                                                const void *object)
 {
-       struct kasan_alloc_meta *alloc_info;
+       struct kasan_alloc_meta *alloc_meta;
 
-       if (!(cache->flags & SLAB_KASAN))
-               return (void *)object;
-
-       alloc_info = get_alloc_info(cache, object);
-       __memset(alloc_info, 0, sizeof(*alloc_info));
+       if (kasan_stack_collection_enabled()) {
+               alloc_meta = kasan_get_alloc_meta(cache, object);
+               if (alloc_meta)
+                       __memset(alloc_meta, 0, sizeof(*alloc_meta));
+       }
 
-       if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-               object = set_tag(object,
-                               assign_tag(cache, object, true, false));
+       /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */
+       object = set_tag(object, assign_tag(cache, object, true, false));
 
        return (void *)object;
 }
 
-static inline bool shadow_invalid(u8 tag, s8 shadow_byte)
-{
-       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
-               return shadow_byte < 0 ||
-                       shadow_byte >= KASAN_SHADOW_SCALE_SIZE;
-
-       /* else CONFIG_KASAN_SW_TAGS: */
-       if ((u8)shadow_byte == KASAN_TAG_INVALID)
-               return true;
-       if ((tag != KASAN_TAG_KERNEL) && (tag != (u8)shadow_byte))
-               return true;
-
-       return false;
-}
-
-static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
+static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
                              unsigned long ip, bool quarantine)
 {
-       s8 shadow_byte;
        u8 tag;
        void *tagged_object;
-       unsigned long rounded_up_size;
 
        tag = get_tag(object);
        tagged_object = object;
-       object = reset_tag(object);
+       object = kasan_reset_tag(object);
 
        if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
            object)) {
@@ -406,37 +346,67 @@ static bool __kasan_slab_free(struct kmem_cache *cache, void *object,
        if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
                return false;
 
-       shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object));
-       if (shadow_invalid(tag, shadow_byte)) {
+       if (check_invalid_free(tagged_object)) {
                kasan_report_invalid_free(tagged_object, ip);
                return true;
        }
 
-       rounded_up_size = round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE);
-       kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
+       poison_range(object, cache->object_size, KASAN_KMALLOC_FREE);
 
-       if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine) ||
-                       unlikely(!(cache->flags & SLAB_KASAN)))
+       if (!kasan_stack_collection_enabled())
+               return false;
+
+       if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine))
                return false;
 
        kasan_set_free_info(cache, object, tag);
 
-       quarantine_put(get_free_info(cache, object), cache);
+       return quarantine_put(cache, object);
+}
 
-       return IS_ENABLED(CONFIG_KASAN_GENERIC);
+bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
+{
+       return ____kasan_slab_free(cache, object, ip, true);
 }
 
-bool kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
+void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
 {
-       return __kasan_slab_free(cache, object, ip, true);
+       struct page *page;
+
+       page = virt_to_head_page(ptr);
+
+       /*
+        * Even though this function is only called for kmem_cache_alloc and
+        * kmalloc backed mempool allocations, those allocations can still be
+        * !PageSlab() when the size provided to kmalloc is larger than
+        * KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
+        */
+       if (unlikely(!PageSlab(page))) {
+               if (ptr != page_address(page)) {
+                       kasan_report_invalid_free(ptr, ip);
+                       return;
+               }
+               poison_range(ptr, page_size(page), KASAN_FREE_PAGE);
+       } else {
+               ____kasan_slab_free(page->slab_cache, ptr, ip, false);
+       }
+}
+
+static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
+{
+       struct kasan_alloc_meta *alloc_meta;
+
+       alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (alloc_meta)
+               kasan_set_track(&alloc_meta->alloc_track, flags);
 }
 
-static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
+static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object,
                                size_t size, gfp_t flags, bool keep_tag)
 {
        unsigned long redzone_start;
        unsigned long redzone_end;
-       u8 tag = 0xff;
+       u8 tag;
 
        if (gfpflags_allow_blocking(flags))
                quarantine_reduce();
@@ -445,38 +415,36 @@ static void *__kasan_kmalloc(struct kmem_cache *cache, const void *object,
                return NULL;
 
        redzone_start = round_up((unsigned long)(object + size),
-                               KASAN_SHADOW_SCALE_SIZE);
+                               KASAN_GRANULE_SIZE);
        redzone_end = round_up((unsigned long)object + cache->object_size,
-                               KASAN_SHADOW_SCALE_SIZE);
-
-       if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-               tag = assign_tag(cache, object, false, keep_tag);
+                               KASAN_GRANULE_SIZE);
+       tag = assign_tag(cache, object, false, keep_tag);
 
-       /* Tag is ignored in set_tag without CONFIG_KASAN_SW_TAGS */
-       kasan_unpoison_shadow(set_tag(object, tag), size);
-       kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
-               KASAN_KMALLOC_REDZONE);
+       /* Tag is ignored in set_tag without CONFIG_KASAN_SW/HW_TAGS */
+       unpoison_range(set_tag(object, tag), size);
+       poison_range((void *)redzone_start, redzone_end - redzone_start,
+                    KASAN_KMALLOC_REDZONE);
 
-       if (cache->flags & SLAB_KASAN)
-               kasan_set_track(&get_alloc_info(cache, object)->alloc_track, flags);
+       if (kasan_stack_collection_enabled())
+               set_alloc_info(cache, (void *)object, flags);
 
        return set_tag(object, tag);
 }
 
-void * __must_check kasan_slab_alloc(struct kmem_cache *cache, void *object,
-                                       gfp_t flags)
+void * __must_check __kasan_slab_alloc(struct kmem_cache *cache,
+                                       void *object, gfp_t flags)
 {
-       return __kasan_kmalloc(cache, object, cache->object_size, flags, false);
+       return ____kasan_kmalloc(cache, object, cache->object_size, flags, false);
 }
 
-void * __must_check kasan_kmalloc(struct kmem_cache *cache, const void *object,
-                               size_t size, gfp_t flags)
+void * __must_check __kasan_kmalloc(struct kmem_cache *cache, const void *object,
+                                       size_t size, gfp_t flags)
 {
-       return __kasan_kmalloc(cache, object, size, flags, true);
+       return ____kasan_kmalloc(cache, object, size, flags, true);
 }
-EXPORT_SYMBOL(kasan_kmalloc);
+EXPORT_SYMBOL(__kasan_kmalloc);
 
-void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
+void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
                                                gfp_t flags)
 {
        struct page *page;
@@ -491,17 +459,17 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
 
        page = virt_to_page(ptr);
        redzone_start = round_up((unsigned long)(ptr + size),
-                               KASAN_SHADOW_SCALE_SIZE);
+                               KASAN_GRANULE_SIZE);
        redzone_end = (unsigned long)ptr + page_size(page);
 
-       kasan_unpoison_shadow(ptr, size);
-       kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
-               KASAN_PAGE_REDZONE);
+       unpoison_range(ptr, size);
+       poison_range((void *)redzone_start, redzone_end - redzone_start,
+                    KASAN_PAGE_REDZONE);
 
        return (void *)ptr;
 }
 
-void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags)
+void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags)
 {
        struct page *page;
 
@@ -511,421 +479,15 @@ void * __must_check kasan_krealloc(const void *object, size_t size, gfp_t flags)
        page = virt_to_head_page(object);
 
        if (unlikely(!PageSlab(page)))
-               return kasan_kmalloc_large(object, size, flags);
+               return __kasan_kmalloc_large(object, size, flags);
        else
-               return __kasan_kmalloc(page->slab_cache, object, size,
+               return ____kasan_kmalloc(page->slab_cache, object, size,
                                                flags, true);
 }
 
-void kasan_poison_kfree(void *ptr, unsigned long ip)
-{
-       struct page *page;
-
-       page = virt_to_head_page(ptr);
-
-       if (unlikely(!PageSlab(page))) {
-               if (ptr != page_address(page)) {
-                       kasan_report_invalid_free(ptr, ip);
-                       return;
-               }
-               kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE);
-       } else {
-               __kasan_slab_free(page->slab_cache, ptr, ip, false);
-       }
-}
-
-void kasan_kfree_large(void *ptr, unsigned long ip)
+void __kasan_kfree_large(void *ptr, unsigned long ip)
 {
        if (ptr != page_address(virt_to_head_page(ptr)))
                kasan_report_invalid_free(ptr, ip);
-       /* The object will be poisoned by page_alloc. */
-}
-
-#ifndef CONFIG_KASAN_VMALLOC
-int kasan_module_alloc(void *addr, size_t size)
-{
-       void *ret;
-       size_t scaled_size;
-       size_t shadow_size;
-       unsigned long shadow_start;
-
-       shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
-       scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT;
-       shadow_size = round_up(scaled_size, PAGE_SIZE);
-
-       if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
-               return -EINVAL;
-
-       ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
-                       shadow_start + shadow_size,
-                       GFP_KERNEL,
-                       PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
-                       __builtin_return_address(0));
-
-       if (ret) {
-               __memset(ret, KASAN_SHADOW_INIT, shadow_size);
-               find_vm_area(addr)->flags |= VM_KASAN;
-               kmemleak_ignore(ret);
-               return 0;
-       }
-
-       return -ENOMEM;
-}
-
-void kasan_free_shadow(const struct vm_struct *vm)
-{
-       if (vm->flags & VM_KASAN)
-               vfree(kasan_mem_to_shadow(vm->addr));
-}
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static bool shadow_mapped(unsigned long addr)
-{
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       if (pgd_none(*pgd))
-               return false;
-       p4d = p4d_offset(pgd, addr);
-       if (p4d_none(*p4d))
-               return false;
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud))
-               return false;
-
-       /*
-        * We can't use pud_large() or pud_huge(), the first one is
-        * arch-specific, the last one depends on HUGETLB_PAGE.  So let's abuse
-        * pud_bad(), if pud is bad then it's bad because it's huge.
-        */
-       if (pud_bad(*pud))
-               return true;
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
-               return false;
-
-       if (pmd_bad(*pmd))
-               return true;
-       pte = pte_offset_kernel(pmd, addr);
-       return !pte_none(*pte);
-}
-
-static int __meminit kasan_mem_notifier(struct notifier_block *nb,
-                       unsigned long action, void *data)
-{
-       struct memory_notify *mem_data = data;
-       unsigned long nr_shadow_pages, start_kaddr, shadow_start;
-       unsigned long shadow_end, shadow_size;
-
-       nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
-       start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
-       shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
-       shadow_size = nr_shadow_pages << PAGE_SHIFT;
-       shadow_end = shadow_start + shadow_size;
-
-       if (WARN_ON(mem_data->nr_pages % KASAN_SHADOW_SCALE_SIZE) ||
-               WARN_ON(start_kaddr % (KASAN_SHADOW_SCALE_SIZE << PAGE_SHIFT)))
-               return NOTIFY_BAD;
-
-       switch (action) {
-       case MEM_GOING_ONLINE: {
-               void *ret;
-
-               /*
-                * If shadow is mapped already than it must have been mapped
-                * during the boot. This could happen if we onlining previously
-                * offlined memory.
-                */
-               if (shadow_mapped(shadow_start))
-                       return NOTIFY_OK;
-
-               ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
-                                       shadow_end, GFP_KERNEL,
-                                       PAGE_KERNEL, VM_NO_GUARD,
-                                       pfn_to_nid(mem_data->start_pfn),
-                                       __builtin_return_address(0));
-               if (!ret)
-                       return NOTIFY_BAD;
-
-               kmemleak_ignore(ret);
-               return NOTIFY_OK;
-       }
-       case MEM_CANCEL_ONLINE:
-       case MEM_OFFLINE: {
-               struct vm_struct *vm;
-
-               /*
-                * shadow_start was either mapped during boot by kasan_init()
-                * or during memory online by __vmalloc_node_range().
-                * In the latter case we can use vfree() to free shadow.
-                * Non-NULL result of the find_vm_area() will tell us if
-                * that was the second case.
-                *
-                * Currently it's not possible to free shadow mapped
-                * during boot by kasan_init(). It's because the code
-                * to do that hasn't been written yet. So we'll just
-                * leak the memory.
-                */
-               vm = find_vm_area((void *)shadow_start);
-               if (vm)
-                       vfree((void *)shadow_start);
-       }
-       }
-
-       return NOTIFY_OK;
-}
-
-static int __init kasan_memhotplug_init(void)
-{
-       hotplug_memory_notifier(kasan_mem_notifier, 0);
-
-       return 0;
-}
-
-core_initcall(kasan_memhotplug_init);
-#endif
-
-#ifdef CONFIG_KASAN_VMALLOC
-static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
-                                     void *unused)
-{
-       unsigned long page;
-       pte_t pte;
-
-       if (likely(!pte_none(*ptep)))
-               return 0;
-
-       page = __get_free_page(GFP_KERNEL);
-       if (!page)
-               return -ENOMEM;
-
-       memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
-       pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
-
-       spin_lock(&init_mm.page_table_lock);
-       if (likely(pte_none(*ptep))) {
-               set_pte_at(&init_mm, addr, ptep, pte);
-               page = 0;
-       }
-       spin_unlock(&init_mm.page_table_lock);
-       if (page)
-               free_page(page);
-       return 0;
-}
-
-int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
-{
-       unsigned long shadow_start, shadow_end;
-       int ret;
-
-       if (!is_vmalloc_or_module_addr((void *)addr))
-               return 0;
-
-       shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
-       shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
-       shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
-       shadow_end = ALIGN(shadow_end, PAGE_SIZE);
-
-       ret = apply_to_page_range(&init_mm, shadow_start,
-                                 shadow_end - shadow_start,
-                                 kasan_populate_vmalloc_pte, NULL);
-       if (ret)
-               return ret;
-
-       flush_cache_vmap(shadow_start, shadow_end);
-
-       /*
-        * We need to be careful about inter-cpu effects here. Consider:
-        *
-        *   CPU#0                                CPU#1
-        * WRITE_ONCE(p, vmalloc(100));         while (x = READ_ONCE(p)) ;
-        *                                      p[99] = 1;
-        *
-        * With compiler instrumentation, that ends up looking like this:
-        *
-        *   CPU#0                                CPU#1
-        * // vmalloc() allocates memory
-        * // let a = area->addr
-        * // we reach kasan_populate_vmalloc
-        * // and call kasan_unpoison_shadow:
-        * STORE shadow(a), unpoison_val
-        * ...
-        * STORE shadow(a+99), unpoison_val     x = LOAD p
-        * // rest of vmalloc process           <data dependency>
-        * STORE p, a                           LOAD shadow(x+99)
-        *
-        * If there is no barrier between the end of unpoisioning the shadow
-        * and the store of the result to p, the stores could be committed
-        * in a different order by CPU#0, and CPU#1 could erroneously observe
-        * poison in the shadow.
-        *
-        * We need some sort of barrier between the stores.
-        *
-        * In the vmalloc() case, this is provided by a smp_wmb() in
-        * clear_vm_uninitialized_flag(). In the per-cpu allocator and in
-        * get_vm_area() and friends, the caller gets shadow allocated but
-        * doesn't have any pages mapped into the virtual address space that
-        * has been reserved. Mapping those pages in will involve taking and
-        * releasing a page-table lock, which will provide the barrier.
-        */
-
-       return 0;
-}
-
-/*
- * Poison the shadow for a vmalloc region. Called as part of the
- * freeing process at the time the region is freed.
- */
-void kasan_poison_vmalloc(const void *start, unsigned long size)
-{
-       if (!is_vmalloc_or_module_addr(start))
-               return;
-
-       size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
-       kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
-}
-
-void kasan_unpoison_vmalloc(const void *start, unsigned long size)
-{
-       if (!is_vmalloc_or_module_addr(start))
-               return;
-
-       kasan_unpoison_shadow(start, size);
+       /* The object will be poisoned by kasan_free_pages(). */
 }
-
-static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
-                                       void *unused)
-{
-       unsigned long page;
-
-       page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
-
-       spin_lock(&init_mm.page_table_lock);
-
-       if (likely(!pte_none(*ptep))) {
-               pte_clear(&init_mm, addr, ptep);
-               free_page(page);
-       }
-       spin_unlock(&init_mm.page_table_lock);
-
-       return 0;
-}
-
-/*
- * Release the backing for the vmalloc region [start, end), which
- * lies within the free region [free_region_start, free_region_end).
- *
- * This can be run lazily, long after the region was freed. It runs
- * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
- * infrastructure.
- *
- * How does this work?
- * -------------------
- *
- * We have a region that is page aligned, labelled as A.
- * That might not map onto the shadow in a way that is page-aligned:
- *
- *                    start                     end
- *                    v                         v
- * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
- *  -------- -------- --------          -------- --------
- *      |        |       |                 |        |
- *      |        |       |         /-------/        |
- *      \-------\|/------/         |/---------------/
- *              |||                ||
- *             |??AAAAAA|AAAAAAAA|AA??????|                < shadow
- *                 (1)      (2)      (3)
- *
- * First we align the start upwards and the end downwards, so that the
- * shadow of the region aligns with shadow page boundaries. In the
- * example, this gives us the shadow page (2). This is the shadow entirely
- * covered by this allocation.
- *
- * Then we have the tricky bits. We want to know if we can free the
- * partially covered shadow pages - (1) and (3) in the example. For this,
- * we are given the start and end of the free region that contains this
- * allocation. Extending our previous example, we could have:
- *
- *  free_region_start                                    free_region_end
- *  |                 start                     end      |
- *  v                 v                         v        v
- * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
- *  -------- -------- --------          -------- --------
- *      |        |       |                 |        |
- *      |        |       |         /-------/        |
- *      \-------\|/------/         |/---------------/
- *              |||                ||
- *             |FFAAAAAA|AAAAAAAA|AAF?????|                < shadow
- *                 (1)      (2)      (3)
- *
- * Once again, we align the start of the free region up, and the end of
- * the free region down so that the shadow is page aligned. So we can free
- * page (1) - we know no allocation currently uses anything in that page,
- * because all of it is in the vmalloc free region. But we cannot free
- * page (3), because we can't be sure that the rest of it is unused.
- *
- * We only consider pages that contain part of the original region for
- * freeing: we don't try to free other pages from the free region or we'd
- * end up trying to free huge chunks of virtual address space.
- *
- * Concurrency
- * -----------
- *
- * How do we know that we're not freeing a page that is simultaneously
- * being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
- *
- * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
- * at the same time. While we run under free_vmap_area_lock, the population
- * code does not.
- *
- * free_vmap_area_lock instead operates to ensure that the larger range
- * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
- * the per-cpu region-finding algorithm both run under free_vmap_area_lock,
- * no space identified as free will become used while we are running. This
- * means that so long as we are careful with alignment and only free shadow
- * pages entirely covered by the free region, we will not run in to any
- * trouble - any simultaneous allocations will be for disjoint regions.
- */
-void kasan_release_vmalloc(unsigned long start, unsigned long end,
-                          unsigned long free_region_start,
-                          unsigned long free_region_end)
-{
-       void *shadow_start, *shadow_end;
-       unsigned long region_start, region_end;
-       unsigned long size;
-
-       region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
-       region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
-
-       free_region_start = ALIGN(free_region_start,
-                                 PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
-
-       if (start != region_start &&
-           free_region_start < region_start)
-               region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
-
-       free_region_end = ALIGN_DOWN(free_region_end,
-                                    PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
-
-       if (end != region_end &&
-           free_region_end > region_end)
-               region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE;
-
-       shadow_start = kasan_mem_to_shadow((void *)region_start);
-       shadow_end = kasan_mem_to_shadow((void *)region_end);
-
-       if (shadow_end > shadow_start) {
-               size = shadow_end - shadow_start;
-               apply_to_existing_page_range(&init_mm,
-                                            (unsigned long)shadow_start,
-                                            size, kasan_depopulate_vmalloc_pte,
-                                            NULL);
-               flush_tlb_kernel_range((unsigned long)shadow_start,
-                                      (unsigned long)shadow_end);
-       }
-}
-#endif
index 30c0a50..1dd5a0f 100644 (file)
@@ -7,15 +7,8 @@
  *
  * Some code borrowed from https://github.com/xairy/kasan-prototype by
  *        Andrey Konovalov <andreyknvl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
@@ -51,7 +44,7 @@ static __always_inline bool memory_is_poisoned_1(unsigned long addr)
        s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
 
        if (unlikely(shadow_value)) {
-               s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
+               s8 last_accessible_byte = addr & KASAN_GRANULE_MASK;
                return unlikely(last_accessible_byte >= shadow_value);
        }
 
@@ -67,7 +60,7 @@ static __always_inline bool memory_is_poisoned_2_4_8(unsigned long addr,
         * Access crosses 8(shadow size)-byte boundary. Such access maps
         * into 2 shadow bytes, so we need to check them both.
         */
-       if (unlikely(((addr + size - 1) & KASAN_SHADOW_MASK) < size - 1))
+       if (unlikely(((addr + size - 1) & KASAN_GRANULE_MASK) < size - 1))
                return *shadow_addr || memory_is_poisoned_1(addr + size - 1);
 
        return memory_is_poisoned_1(addr + size - 1);
@@ -78,7 +71,7 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr)
        u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
 
        /* Unaligned 16-bytes access maps into 3 shadow bytes. */
-       if (unlikely(!IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
+       if (unlikely(!IS_ALIGNED(addr, KASAN_GRANULE_SIZE)))
                return *shadow_addr || memory_is_poisoned_1(addr + 15);
 
        return *shadow_addr;
@@ -139,7 +132,7 @@ static __always_inline bool memory_is_poisoned_n(unsigned long addr,
                s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte);
 
                if (unlikely(ret != (unsigned long)last_shadow ||
-                       ((long)(last_byte & KASAN_SHADOW_MASK) >= *last_shadow)))
+                       ((long)(last_byte & KASAN_GRANULE_MASK) >= *last_shadow)))
                        return true;
        }
        return false;
@@ -192,6 +185,13 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
        return check_memory_region_inline(addr, size, write, ret_ip);
 }
 
+bool check_invalid_free(void *addr)
+{
+       s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr));
+
+       return shadow_byte < 0 || shadow_byte >= KASAN_GRANULE_SIZE;
+}
+
 void kasan_cache_shrink(struct kmem_cache *cache)
 {
        quarantine_remove_cache(cache);
@@ -205,13 +205,13 @@ void kasan_cache_shutdown(struct kmem_cache *cache)
 
 static void register_global(struct kasan_global *global)
 {
-       size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE);
+       size_t aligned_size = round_up(global->size, KASAN_GRANULE_SIZE);
 
-       kasan_unpoison_shadow(global->beg, global->size);
+       unpoison_range(global->beg, global->size);
 
-       kasan_poison_shadow(global->beg + aligned_size,
-               global->size_with_redzone - aligned_size,
-               KASAN_GLOBAL_REDZONE);
+       poison_range(global->beg + aligned_size,
+                    global->size_with_redzone - aligned_size,
+                    KASAN_GLOBAL_REDZONE);
 }
 
 void __asan_register_globals(struct kasan_global *globals, size_t size)
@@ -279,10 +279,10 @@ EXPORT_SYMBOL(__asan_handle_no_return);
 /* Emitted by compiler to poison alloca()ed objects. */
 void __asan_alloca_poison(unsigned long addr, size_t size)
 {
-       size_t rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
+       size_t rounded_up_size = round_up(size, KASAN_GRANULE_SIZE);
        size_t padding_size = round_up(size, KASAN_ALLOCA_REDZONE_SIZE) -
                        rounded_up_size;
-       size_t rounded_down_size = round_down(size, KASAN_SHADOW_SCALE_SIZE);
+       size_t rounded_down_size = round_down(size, KASAN_GRANULE_SIZE);
 
        const void *left_redzone = (const void *)(addr -
                        KASAN_ALLOCA_REDZONE_SIZE);
@@ -290,13 +290,12 @@ void __asan_alloca_poison(unsigned long addr, size_t size)
 
        WARN_ON(!IS_ALIGNED(addr, KASAN_ALLOCA_REDZONE_SIZE));
 
-       kasan_unpoison_shadow((const void *)(addr + rounded_down_size),
-                             size - rounded_down_size);
-       kasan_poison_shadow(left_redzone, KASAN_ALLOCA_REDZONE_SIZE,
-                       KASAN_ALLOCA_LEFT);
-       kasan_poison_shadow(right_redzone,
-                       padding_size + KASAN_ALLOCA_REDZONE_SIZE,
-                       KASAN_ALLOCA_RIGHT);
+       unpoison_range((const void *)(addr + rounded_down_size),
+                      size - rounded_down_size);
+       poison_range(left_redzone, KASAN_ALLOCA_REDZONE_SIZE,
+                    KASAN_ALLOCA_LEFT);
+       poison_range(right_redzone, padding_size + KASAN_ALLOCA_REDZONE_SIZE,
+                    KASAN_ALLOCA_RIGHT);
 }
 EXPORT_SYMBOL(__asan_alloca_poison);
 
@@ -306,7 +305,7 @@ void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom)
        if (unlikely(!stack_top || stack_top > stack_bottom))
                return;
 
-       kasan_unpoison_shadow(stack_top, stack_bottom - stack_top);
+       unpoison_range(stack_top, stack_bottom - stack_top);
 }
 EXPORT_SYMBOL(__asan_allocas_unpoison);
 
@@ -329,7 +328,7 @@ void kasan_record_aux_stack(void *addr)
 {
        struct page *page = kasan_addr_to_page(addr);
        struct kmem_cache *cache;
-       struct kasan_alloc_meta *alloc_info;
+       struct kasan_alloc_meta *alloc_meta;
        void *object;
 
        if (!(page && PageSlab(page)))
@@ -337,10 +336,10 @@ void kasan_record_aux_stack(void *addr)
 
        cache = page->slab_cache;
        object = nearest_obj(cache, page, addr);
-       alloc_info = get_alloc_info(cache, object);
+       alloc_meta = kasan_get_alloc_meta(cache, object);
 
-       alloc_info->aux_stack[1] = alloc_info->aux_stack[0];
-       alloc_info->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
+       alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
+       alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
 }
 
 void kasan_set_free_info(struct kmem_cache *cache,
@@ -348,12 +347,12 @@ void kasan_set_free_info(struct kmem_cache *cache,
 {
        struct kasan_free_meta *free_meta;
 
-       free_meta = get_free_info(cache, object);
-       kasan_set_track(&free_meta->free_track, GFP_NOWAIT);
+       free_meta = kasan_get_free_meta(cache, object);
+       if (!free_meta)
+               return;
 
-       /*
-        *  the object was freed and has free track set
-        */
+       kasan_set_track(&free_meta->free_track, GFP_NOWAIT);
+       /* The object was freed and has free track set. */
        *(u8 *)kasan_mem_to_shadow(object) = KASAN_KMALLOC_FREETRACK;
 }
 
@@ -362,5 +361,6 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 {
        if (*(u8 *)kasan_mem_to_shadow(object) != KASAN_KMALLOC_FREETRACK)
                return NULL;
-       return &get_free_info(cache, object)->free_track;
+       /* Free meta must be present with KASAN_KMALLOC_FREETRACK. */
+       return &kasan_get_free_meta(cache, object)->free_track;
 }
diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c
deleted file mode 100644 (file)
index a38c7a9..0000000
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This file contains generic KASAN specific error reporting code.
- *
- * Copyright (c) 2014 Samsung Electronics Co., Ltd.
- * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
- *
- * Some code borrowed from https://github.com/xairy/kasan-prototype by
- *        Andrey Konovalov <andreyknvl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/bitops.h>
-#include <linux/ftrace.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/printk.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/stackdepot.h>
-#include <linux/stacktrace.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/kasan.h>
-#include <linux/module.h>
-
-#include <asm/sections.h>
-
-#include "kasan.h"
-#include "../slab.h"
-
-void *find_first_bad_addr(void *addr, size_t size)
-{
-       void *p = addr;
-
-       while (p < addr + size && !(*(u8 *)kasan_mem_to_shadow(p)))
-               p += KASAN_SHADOW_SCALE_SIZE;
-       return p;
-}
-
-static const char *get_shadow_bug_type(struct kasan_access_info *info)
-{
-       const char *bug_type = "unknown-crash";
-       u8 *shadow_addr;
-
-       shadow_addr = (u8 *)kasan_mem_to_shadow(info->first_bad_addr);
-
-       /*
-        * If shadow byte value is in [0, KASAN_SHADOW_SCALE_SIZE) we can look
-        * at the next shadow byte to determine the type of the bad access.
-        */
-       if (*shadow_addr > 0 && *shadow_addr <= KASAN_SHADOW_SCALE_SIZE - 1)
-               shadow_addr++;
-
-       switch (*shadow_addr) {
-       case 0 ... KASAN_SHADOW_SCALE_SIZE - 1:
-               /*
-                * In theory it's still possible to see these shadow values
-                * due to a data race in the kernel code.
-                */
-               bug_type = "out-of-bounds";
-               break;
-       case KASAN_PAGE_REDZONE:
-       case KASAN_KMALLOC_REDZONE:
-               bug_type = "slab-out-of-bounds";
-               break;
-       case KASAN_GLOBAL_REDZONE:
-               bug_type = "global-out-of-bounds";
-               break;
-       case KASAN_STACK_LEFT:
-       case KASAN_STACK_MID:
-       case KASAN_STACK_RIGHT:
-       case KASAN_STACK_PARTIAL:
-               bug_type = "stack-out-of-bounds";
-               break;
-       case KASAN_FREE_PAGE:
-       case KASAN_KMALLOC_FREE:
-       case KASAN_KMALLOC_FREETRACK:
-               bug_type = "use-after-free";
-               break;
-       case KASAN_ALLOCA_LEFT:
-       case KASAN_ALLOCA_RIGHT:
-               bug_type = "alloca-out-of-bounds";
-               break;
-       case KASAN_VMALLOC_INVALID:
-               bug_type = "vmalloc-out-of-bounds";
-               break;
-       }
-
-       return bug_type;
-}
-
-static const char *get_wild_bug_type(struct kasan_access_info *info)
-{
-       const char *bug_type = "unknown-crash";
-
-       if ((unsigned long)info->access_addr < PAGE_SIZE)
-               bug_type = "null-ptr-deref";
-       else if ((unsigned long)info->access_addr < TASK_SIZE)
-               bug_type = "user-memory-access";
-       else
-               bug_type = "wild-memory-access";
-
-       return bug_type;
-}
-
-const char *get_bug_type(struct kasan_access_info *info)
-{
-       /*
-        * If access_size is a negative number, then it has reason to be
-        * defined as out-of-bounds bug type.
-        *
-        * Casting negative numbers to size_t would indeed turn up as
-        * a large size_t and its value will be larger than ULONG_MAX/2,
-        * so that this can qualify as out-of-bounds.
-        */
-       if (info->access_addr + info->access_size < info->access_addr)
-               return "out-of-bounds";
-
-       if (addr_has_shadow(info->access_addr))
-               return get_shadow_bug_type(info);
-       return get_wild_bug_type(info);
-}
-
-#define DEFINE_ASAN_REPORT_LOAD(size)                     \
-void __asan_report_load##size##_noabort(unsigned long addr) \
-{                                                         \
-       kasan_report(addr, size, false, _RET_IP_);        \
-}                                                         \
-EXPORT_SYMBOL(__asan_report_load##size##_noabort)
-
-#define DEFINE_ASAN_REPORT_STORE(size)                     \
-void __asan_report_store##size##_noabort(unsigned long addr) \
-{                                                          \
-       kasan_report(addr, size, true, _RET_IP_);          \
-}                                                          \
-EXPORT_SYMBOL(__asan_report_store##size##_noabort)
-
-DEFINE_ASAN_REPORT_LOAD(1);
-DEFINE_ASAN_REPORT_LOAD(2);
-DEFINE_ASAN_REPORT_LOAD(4);
-DEFINE_ASAN_REPORT_LOAD(8);
-DEFINE_ASAN_REPORT_LOAD(16);
-DEFINE_ASAN_REPORT_STORE(1);
-DEFINE_ASAN_REPORT_STORE(2);
-DEFINE_ASAN_REPORT_STORE(4);
-DEFINE_ASAN_REPORT_STORE(8);
-DEFINE_ASAN_REPORT_STORE(16);
-
-void __asan_report_load_n_noabort(unsigned long addr, size_t size)
-{
-       kasan_report(addr, size, false, _RET_IP_);
-}
-EXPORT_SYMBOL(__asan_report_load_n_noabort);
-
-void __asan_report_store_n_noabort(unsigned long addr, size_t size)
-{
-       kasan_report(addr, size, true, _RET_IP_);
-}
-EXPORT_SYMBOL(__asan_report_store_n_noabort);
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
new file mode 100644 (file)
index 0000000..55bd6f0
--- /dev/null
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains core hardware tag-based KASAN code.
+ *
+ * Copyright (c) 2020 Google, Inc.
+ * Author: Andrey Konovalov <andreyknvl@google.com>
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+
+#include <linux/init.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/mm.h>
+#include <linux/static_key.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "kasan.h"
+
+enum kasan_arg_mode {
+       KASAN_ARG_MODE_DEFAULT,
+       KASAN_ARG_MODE_OFF,
+       KASAN_ARG_MODE_PROD,
+       KASAN_ARG_MODE_FULL,
+};
+
+enum kasan_arg_stacktrace {
+       KASAN_ARG_STACKTRACE_DEFAULT,
+       KASAN_ARG_STACKTRACE_OFF,
+       KASAN_ARG_STACKTRACE_ON,
+};
+
+enum kasan_arg_fault {
+       KASAN_ARG_FAULT_DEFAULT,
+       KASAN_ARG_FAULT_REPORT,
+       KASAN_ARG_FAULT_PANIC,
+};
+
+static enum kasan_arg_mode kasan_arg_mode __ro_after_init;
+static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
+static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
+
+/* Whether KASAN is enabled at all. */
+DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
+EXPORT_SYMBOL(kasan_flag_enabled);
+
+/* Whether to collect alloc/free stack traces. */
+DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
+
+/* Whether panic or disable tag checking on fault. */
+bool kasan_flag_panic __ro_after_init;
+
+/* kasan.mode=off/prod/full */
+static int __init early_kasan_mode(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       if (!strcmp(arg, "off"))
+               kasan_arg_mode = KASAN_ARG_MODE_OFF;
+       else if (!strcmp(arg, "prod"))
+               kasan_arg_mode = KASAN_ARG_MODE_PROD;
+       else if (!strcmp(arg, "full"))
+               kasan_arg_mode = KASAN_ARG_MODE_FULL;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+early_param("kasan.mode", early_kasan_mode);
+
+/* kasan.stack=off/on */
+static int __init early_kasan_flag_stacktrace(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       if (!strcmp(arg, "off"))
+               kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_OFF;
+       else if (!strcmp(arg, "on"))
+               kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_ON;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+early_param("kasan.stacktrace", early_kasan_flag_stacktrace);
+
+/* kasan.fault=report/panic */
+static int __init early_kasan_fault(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       if (!strcmp(arg, "report"))
+               kasan_arg_fault = KASAN_ARG_FAULT_REPORT;
+       else if (!strcmp(arg, "panic"))
+               kasan_arg_fault = KASAN_ARG_FAULT_PANIC;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+early_param("kasan.fault", early_kasan_fault);
+
+/* kasan_init_hw_tags_cpu() is called for each CPU. */
+void kasan_init_hw_tags_cpu(void)
+{
+       /*
+        * There's no need to check that the hardware is MTE-capable here,
+        * as this function is only called for MTE-capable hardware.
+        */
+
+       /* If KASAN is disabled, do nothing. */
+       if (kasan_arg_mode == KASAN_ARG_MODE_OFF)
+               return;
+
+       hw_init_tags(KASAN_TAG_MAX);
+       hw_enable_tagging();
+}
+
+/* kasan_init_hw_tags() is called once on boot CPU. */
+void __init kasan_init_hw_tags(void)
+{
+       /* If hardware doesn't support MTE, do nothing. */
+       if (!system_supports_mte())
+               return;
+
+       /* Choose KASAN mode if kasan boot parameter is not provided. */
+       if (kasan_arg_mode == KASAN_ARG_MODE_DEFAULT) {
+               if (IS_ENABLED(CONFIG_DEBUG_KERNEL))
+                       kasan_arg_mode = KASAN_ARG_MODE_FULL;
+               else
+                       kasan_arg_mode = KASAN_ARG_MODE_PROD;
+       }
+
+       /* Preset parameter values based on the mode. */
+       switch (kasan_arg_mode) {
+       case KASAN_ARG_MODE_DEFAULT:
+               /* Shouldn't happen as per the check above. */
+               WARN_ON(1);
+               return;
+       case KASAN_ARG_MODE_OFF:
+               /* If KASAN is disabled, do nothing. */
+               return;
+       case KASAN_ARG_MODE_PROD:
+               static_branch_enable(&kasan_flag_enabled);
+               break;
+       case KASAN_ARG_MODE_FULL:
+               static_branch_enable(&kasan_flag_enabled);
+               static_branch_enable(&kasan_flag_stacktrace);
+               break;
+       }
+
+       /* Now, optionally override the presets. */
+
+       switch (kasan_arg_stacktrace) {
+       case KASAN_ARG_STACKTRACE_DEFAULT:
+               break;
+       case KASAN_ARG_STACKTRACE_OFF:
+               static_branch_disable(&kasan_flag_stacktrace);
+               break;
+       case KASAN_ARG_STACKTRACE_ON:
+               static_branch_enable(&kasan_flag_stacktrace);
+               break;
+       }
+
+       switch (kasan_arg_fault) {
+       case KASAN_ARG_FAULT_DEFAULT:
+               break;
+       case KASAN_ARG_FAULT_REPORT:
+               kasan_flag_panic = false;
+               break;
+       case KASAN_ARG_FAULT_PANIC:
+               kasan_flag_panic = true;
+               break;
+       }
+
+       pr_info("KernelAddressSanitizer initialized\n");
+}
+
+void kasan_set_free_info(struct kmem_cache *cache,
+                               void *object, u8 tag)
+{
+       struct kasan_alloc_meta *alloc_meta;
+
+       alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (alloc_meta)
+               kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT);
+}
+
+struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
+                               void *object, u8 tag)
+{
+       struct kasan_alloc_meta *alloc_meta;
+
+       alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (!alloc_meta)
+               return NULL;
+
+       return &alloc_meta->free_track[0];
+}
index fe6be0b..bc0ad20 100644 (file)
@@ -1,14 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * This file contains some kasan initialization code.
+ * This file contains KASAN shadow initialization code.
  *
  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
  * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/memblock.h>
@@ -446,9 +441,8 @@ void kasan_remove_zero_shadow(void *start, unsigned long size)
        addr = (unsigned long)kasan_mem_to_shadow(start);
        end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT);
 
-       if (WARN_ON((unsigned long)start %
-                       (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
-           WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+       if (WARN_ON((unsigned long)start % KASAN_MEMORY_PER_SHADOW_PAGE) ||
+           WARN_ON(size % KASAN_MEMORY_PER_SHADOW_PAGE))
                return;
 
        for (; addr < end; addr = next) {
@@ -481,9 +475,8 @@ int kasan_add_zero_shadow(void *start, unsigned long size)
        shadow_start = kasan_mem_to_shadow(start);
        shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT);
 
-       if (WARN_ON((unsigned long)start %
-                       (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
-           WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+       if (WARN_ON((unsigned long)start % KASAN_MEMORY_PER_SHADOW_PAGE) ||
+           WARN_ON(size % KASAN_MEMORY_PER_SHADOW_PAGE))
                return -EINVAL;
 
        ret = kasan_populate_early_shadow(shadow_start, shadow_end);
index ac49945..cc4d9e1 100644 (file)
@@ -5,8 +5,32 @@
 #include <linux/kasan.h>
 #include <linux/stackdepot.h>
 
-#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
-#define KASAN_SHADOW_MASK       (KASAN_SHADOW_SCALE_SIZE - 1)
+#ifdef CONFIG_KASAN_HW_TAGS
+#include <linux/static_key.h>
+DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
+static inline bool kasan_stack_collection_enabled(void)
+{
+       return static_branch_unlikely(&kasan_flag_stacktrace);
+}
+#else
+static inline bool kasan_stack_collection_enabled(void)
+{
+       return true;
+}
+#endif
+
+extern bool kasan_flag_panic __ro_after_init;
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+#define KASAN_GRANULE_SIZE     (1UL << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#include <asm/mte-kasan.h>
+#define KASAN_GRANULE_SIZE     MTE_GRANULE_SIZE
+#endif
+
+#define KASAN_GRANULE_MASK     (KASAN_GRANULE_SIZE - 1)
+
+#define KASAN_MEMORY_PER_SHADOW_PAGE   (KASAN_GRANULE_SIZE << PAGE_SHIFT)
 
 #define KASAN_TAG_KERNEL       0xFF /* native kernel pointers tag */
 #define KASAN_TAG_INVALID      0xFE /* inaccessible memory tag */
 #define KASAN_ABI_VERSION 1
 #endif
 
+/* Metadata layout customization. */
+#define META_BYTES_PER_BLOCK 1
+#define META_BLOCKS_PER_ROW 16
+#define META_BYTES_PER_ROW (META_BLOCKS_PER_ROW * META_BYTES_PER_BLOCK)
+#define META_MEM_BYTES_PER_ROW (META_BYTES_PER_ROW * KASAN_GRANULE_SIZE)
+#define META_ROWS_AROUND_ADDR 2
+
 struct kasan_access_info {
        const void *access_addr;
        const void *first_bad_addr;
@@ -124,20 +155,33 @@ struct kasan_alloc_meta {
 struct qlist_node {
        struct qlist_node *next;
 };
+
+/*
+ * Generic mode either stores free meta in the object itself or in the redzone
+ * after the object. In the former case free meta offset is 0, in the latter
+ * case it has some sane value smaller than INT_MAX. Use INT_MAX as free meta
+ * offset when free meta isn't present.
+ */
+#define KASAN_NO_FREE_META INT_MAX
+
 struct kasan_free_meta {
+#ifdef CONFIG_KASAN_GENERIC
        /* This field is used while the object is in the quarantine.
         * Otherwise it might be used for the allocator freelist.
         */
        struct qlist_node quarantine_link;
-#ifdef CONFIG_KASAN_GENERIC
        struct kasan_track free_track;
 #endif
 };
 
-struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache,
-                                       const void *object);
-struct kasan_free_meta *get_free_info(struct kmem_cache *cache,
-                                       const void *object);
+struct kasan_alloc_meta *kasan_get_alloc_meta(struct kmem_cache *cache,
+                                               const void *object);
+#ifdef CONFIG_KASAN_GENERIC
+struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
+                                               const void *object);
+#endif
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
@@ -145,13 +189,11 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
                << KASAN_SHADOW_SCALE_SHIFT);
 }
 
-static inline bool addr_has_shadow(const void *addr)
+static inline bool addr_has_metadata(const void *addr)
 {
        return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
 }
 
-void kasan_poison_shadow(const void *address, size_t size, u8 value);
-
 /**
  * check_memory_region - Check memory region, and report if invalid access.
  * @addr: the accessed address
@@ -163,8 +205,30 @@ void kasan_poison_shadow(const void *address, size_t size, u8 value);
 bool check_memory_region(unsigned long addr, size_t size, bool write,
                                unsigned long ret_ip);
 
+#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
+
+static inline bool addr_has_metadata(const void *addr)
+{
+       return true;
+}
+
+#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
+
+#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+void print_tags(u8 addr_tag, const void *addr);
+#else
+static inline void print_tags(u8 addr_tag, const void *addr) { }
+#endif
+
 void *find_first_bad_addr(void *addr, size_t size);
 const char *get_bug_type(struct kasan_access_info *info);
+void metadata_fetch_row(char *buffer, void *row);
+
+#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK
+void print_address_stack_frame(const void *addr);
+#else
+static inline void print_address_stack_frame(const void *addr) { }
+#endif
 
 bool kasan_report(unsigned long addr, size_t size,
                bool is_write, unsigned long ip);
@@ -180,49 +244,92 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 
 #if defined(CONFIG_KASAN_GENERIC) && \
        (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
-void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache);
+bool quarantine_put(struct kmem_cache *cache, void *object);
 void quarantine_reduce(void);
 void quarantine_remove_cache(struct kmem_cache *cache);
 #else
-static inline void quarantine_put(struct kasan_free_meta *info,
-                               struct kmem_cache *cache) { }
+static inline bool quarantine_put(struct kmem_cache *cache, void *object) { return false; }
 static inline void quarantine_reduce(void) { }
 static inline void quarantine_remove_cache(struct kmem_cache *cache) { }
 #endif
 
-#ifdef CONFIG_KASAN_SW_TAGS
+#ifndef arch_kasan_set_tag
+static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
+{
+       return addr;
+}
+#endif
+#ifndef arch_kasan_get_tag
+#define arch_kasan_get_tag(addr)       0
+#endif
 
-void print_tags(u8 addr_tag, const void *addr);
+#define set_tag(addr, tag)     ((void *)arch_kasan_set_tag((addr), (tag)))
+#define get_tag(addr)          arch_kasan_get_tag(addr)
 
-u8 random_tag(void);
+#ifdef CONFIG_KASAN_HW_TAGS
+
+#ifndef arch_enable_tagging
+#define arch_enable_tagging()
+#endif
+#ifndef arch_init_tags
+#define arch_init_tags(max_tag)
+#endif
+#ifndef arch_get_random_tag
+#define arch_get_random_tag()  (0xFF)
+#endif
+#ifndef arch_get_mem_tag
+#define arch_get_mem_tag(addr) (0xFF)
+#endif
+#ifndef arch_set_mem_tag_range
+#define arch_set_mem_tag_range(addr, size, tag) ((void *)(addr))
+#endif
+
+#define hw_enable_tagging()                    arch_enable_tagging()
+#define hw_init_tags(max_tag)                  arch_init_tags(max_tag)
+#define hw_get_random_tag()                    arch_get_random_tag()
+#define hw_get_mem_tag(addr)                   arch_get_mem_tag(addr)
+#define hw_set_mem_tag_range(addr, size, tag)  arch_set_mem_tag_range((addr), (size), (tag))
 
+#endif /* CONFIG_KASAN_HW_TAGS */
+
+#ifdef CONFIG_KASAN_SW_TAGS
+u8 random_tag(void);
+#elif defined(CONFIG_KASAN_HW_TAGS)
+static inline u8 random_tag(void) { return hw_get_random_tag(); }
 #else
+static inline u8 random_tag(void) { return 0; }
+#endif
 
-static inline void print_tags(u8 addr_tag, const void *addr) { }
+#ifdef CONFIG_KASAN_HW_TAGS
 
-static inline u8 random_tag(void)
+static inline void poison_range(const void *address, size_t size, u8 value)
 {
-       return 0;
+       hw_set_mem_tag_range(kasan_reset_tag(address),
+                       round_up(size, KASAN_GRANULE_SIZE), value);
 }
 
-#endif
+static inline void unpoison_range(const void *address, size_t size)
+{
+       hw_set_mem_tag_range(kasan_reset_tag(address),
+                       round_up(size, KASAN_GRANULE_SIZE), get_tag(address));
+}
 
-#ifndef arch_kasan_set_tag
-static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
+static inline bool check_invalid_free(void *addr)
 {
-       return addr;
+       u8 ptr_tag = get_tag(addr);
+       u8 mem_tag = hw_get_mem_tag(addr);
+
+       return (mem_tag == KASAN_TAG_INVALID) ||
+               (ptr_tag != KASAN_TAG_KERNEL && ptr_tag != mem_tag);
 }
-#endif
-#ifndef arch_kasan_reset_tag
-#define arch_kasan_reset_tag(addr)     ((void *)(addr))
-#endif
-#ifndef arch_kasan_get_tag
-#define arch_kasan_get_tag(addr)       0
-#endif
 
-#define set_tag(addr, tag)     ((void *)arch_kasan_set_tag((addr), (tag)))
-#define reset_tag(addr)                ((void *)arch_kasan_reset_tag(addr))
-#define get_tag(addr)          arch_kasan_get_tag(addr)
+#else /* CONFIG_KASAN_HW_TAGS */
+
+void poison_range(const void *address, size_t size, u8 value);
+void unpoison_range(const void *address, size_t size);
+bool check_invalid_free(void *addr);
+
+#endif /* CONFIG_KASAN_HW_TAGS */
 
 /*
  * Exported functions for interfaces called from assembly or from generated
index 0e3f849..5578312 100644 (file)
@@ -6,16 +6,6 @@
  * Copyright (C) 2016 Google, Inc.
  *
  * Based on code by Dmitry Chernenkov.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
  */
 
 #include <linux/gfp.h>
@@ -147,7 +137,12 @@ static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache)
        if (IS_ENABLED(CONFIG_SLAB))
                local_irq_save(flags);
 
+       /*
+        * As the object now gets freed from the quaratine, assume that its
+        * free track is no longer valid.
+        */
        *(u8 *)kasan_mem_to_shadow(object) = KASAN_KMALLOC_FREE;
+
        ___cache_free(cache, object, _THIS_IP_);
 
        if (IS_ENABLED(CONFIG_SLAB))
@@ -173,11 +168,19 @@ static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
        qlist_init(q);
 }
 
-void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
+bool quarantine_put(struct kmem_cache *cache, void *object)
 {
        unsigned long flags;
        struct qlist_head *q;
        struct qlist_head temp = QLIST_INIT;
+       struct kasan_free_meta *meta = kasan_get_free_meta(cache, object);
+
+       /*
+        * If there's no metadata for this object, don't put it into
+        * quarantine.
+        */
+       if (!meta)
+               return false;
 
        /*
         * Note: irq must be disabled until after we move the batch to the
@@ -192,9 +195,9 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
        q = this_cpu_ptr(&cpu_quarantine);
        if (q->offline) {
                local_irq_restore(flags);
-               return;
+               return false;
        }
-       qlist_put(q, &info->quarantine_link, cache->size);
+       qlist_put(q, &meta->quarantine_link, cache->size);
        if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
                qlist_move_all(q, &temp);
 
@@ -215,6 +218,8 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
        }
 
        local_irq_restore(flags);
+
+       return true;
 }
 
 void quarantine_reduce(void)
index 5a0102f..c0fb217 100644 (file)
@@ -1,17 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * This file contains common generic and tag-based KASAN error reporting code.
+ * This file contains common KASAN error reporting code.
  *
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
  *
  * Some code borrowed from https://github.com/xairy/kasan-prototype by
  *        Andrey Konovalov <andreyknvl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/bitops.h>
 #include "kasan.h"
 #include "../slab.h"
 
-/* Shadow layout customization. */
-#define SHADOW_BYTES_PER_BLOCK 1
-#define SHADOW_BLOCKS_PER_ROW 16
-#define SHADOW_BYTES_PER_ROW (SHADOW_BLOCKS_PER_ROW * SHADOW_BYTES_PER_BLOCK)
-#define SHADOW_ROWS_AROUND_ADDR 2
-
 static unsigned long kasan_flags;
 
 #define KASAN_BIT_REPORTED     0
@@ -73,9 +62,14 @@ static void print_error_description(struct kasan_access_info *info)
 {
        pr_err("BUG: KASAN: %s in %pS\n",
                get_bug_type(info), (void *)info->ip);
-       pr_err("%s of size %zu at addr %px by task %s/%d\n",
-               info->is_write ? "Write" : "Read", info->access_size,
-               info->access_addr, current->comm, task_pid_nr(current));
+       if (info->access_size)
+               pr_err("%s of size %zu at addr %px by task %s/%d\n",
+                       info->is_write ? "Write" : "Read", info->access_size,
+                       info->access_addr, current->comm, task_pid_nr(current));
+       else
+               pr_err("%s at addr %px by task %s/%d\n",
+                       info->is_write ? "Write" : "Read",
+                       info->access_addr, current->comm, task_pid_nr(current));
 }
 
 static DEFINE_SPINLOCK(report_lock);
@@ -105,6 +99,10 @@ static void end_report(unsigned long *flags)
                panic_on_warn = 0;
                panic("panic_on_warn set ...\n");
        }
+#ifdef CONFIG_KASAN_HW_TAGS
+       if (kasan_flag_panic)
+               panic("kasan.fault=panic set ...\n");
+#endif
        kasan_enable_current();
 }
 
@@ -167,36 +165,45 @@ static void describe_object_addr(struct kmem_cache *cache, void *object,
                (void *)(object_addr + cache->object_size));
 }
 
-static void describe_object(struct kmem_cache *cache, void *object,
-                               const void *addr, u8 tag)
+static void describe_object_stacks(struct kmem_cache *cache, void *object,
+                                       const void *addr, u8 tag)
 {
-       struct kasan_alloc_meta *alloc_info = get_alloc_info(cache, object);
+       struct kasan_alloc_meta *alloc_meta;
+       struct kasan_track *free_track;
 
-       if (cache->flags & SLAB_KASAN) {
-               struct kasan_track *free_track;
+       alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (alloc_meta) {
+               print_track(&alloc_meta->alloc_track, "Allocated");
+               pr_err("\n");
+       }
 
-               print_track(&alloc_info->alloc_track, "Allocated");
+       free_track = kasan_get_free_track(cache, object, tag);
+       if (free_track) {
+               print_track(free_track, "Freed");
                pr_err("\n");
-               free_track = kasan_get_free_track(cache, object, tag);
-               if (free_track) {
-                       print_track(free_track, "Freed");
-                       pr_err("\n");
-               }
+       }
 
 #ifdef CONFIG_KASAN_GENERIC
-               if (alloc_info->aux_stack[0]) {
-                       pr_err("Last potentially related work creation:\n");
-                       print_stack(alloc_info->aux_stack[0]);
-                       pr_err("\n");
-               }
-               if (alloc_info->aux_stack[1]) {
-                       pr_err("Second to last potentially related work creation:\n");
-                       print_stack(alloc_info->aux_stack[1]);
-                       pr_err("\n");
-               }
-#endif
+       if (!alloc_meta)
+               return;
+       if (alloc_meta->aux_stack[0]) {
+               pr_err("Last potentially related work creation:\n");
+               print_stack(alloc_meta->aux_stack[0]);
+               pr_err("\n");
        }
+       if (alloc_meta->aux_stack[1]) {
+               pr_err("Second to last potentially related work creation:\n");
+               print_stack(alloc_meta->aux_stack[1]);
+               pr_err("\n");
+       }
+#endif
+}
 
+static void describe_object(struct kmem_cache *cache, void *object,
+                               const void *addr, u8 tag)
+{
+       if (kasan_stack_collection_enabled())
+               describe_object_stacks(cache, object, addr, tag);
        describe_object_addr(cache, object, addr);
 }
 
@@ -216,168 +223,6 @@ static inline bool init_task_stack_addr(const void *addr)
                        sizeof(init_thread_union.stack));
 }
 
-static bool __must_check tokenize_frame_descr(const char **frame_descr,
-                                             char *token, size_t max_tok_len,
-                                             unsigned long *value)
-{
-       const char *sep = strchr(*frame_descr, ' ');
-
-       if (sep == NULL)
-               sep = *frame_descr + strlen(*frame_descr);
-
-       if (token != NULL) {
-               const size_t tok_len = sep - *frame_descr;
-
-               if (tok_len + 1 > max_tok_len) {
-                       pr_err("KASAN internal error: frame description too long: %s\n",
-                              *frame_descr);
-                       return false;
-               }
-
-               /* Copy token (+ 1 byte for '\0'). */
-               strlcpy(token, *frame_descr, tok_len + 1);
-       }
-
-       /* Advance frame_descr past separator. */
-       *frame_descr = sep + 1;
-
-       if (value != NULL && kstrtoul(token, 10, value)) {
-               pr_err("KASAN internal error: not a valid number: %s\n", token);
-               return false;
-       }
-
-       return true;
-}
-
-static void print_decoded_frame_descr(const char *frame_descr)
-{
-       /*
-        * We need to parse the following string:
-        *    "n alloc_1 alloc_2 ... alloc_n"
-        * where alloc_i looks like
-        *    "offset size len name"
-        * or "offset size len name:line".
-        */
-
-       char token[64];
-       unsigned long num_objects;
-
-       if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
-                                 &num_objects))
-               return;
-
-       pr_err("\n");
-       pr_err("this frame has %lu %s:\n", num_objects,
-              num_objects == 1 ? "object" : "objects");
-
-       while (num_objects--) {
-               unsigned long offset;
-               unsigned long size;
-
-               /* access offset */
-               if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
-                                         &offset))
-                       return;
-               /* access size */
-               if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
-                                         &size))
-                       return;
-               /* name length (unused) */
-               if (!tokenize_frame_descr(&frame_descr, NULL, 0, NULL))
-                       return;
-               /* object name */
-               if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
-                                         NULL))
-                       return;
-
-               /* Strip line number; without filename it's not very helpful. */
-               strreplace(token, ':', '\0');
-
-               /* Finally, print object information. */
-               pr_err(" [%lu, %lu) '%s'", offset, offset + size, token);
-       }
-}
-
-static bool __must_check get_address_stack_frame_info(const void *addr,
-                                                     unsigned long *offset,
-                                                     const char **frame_descr,
-                                                     const void **frame_pc)
-{
-       unsigned long aligned_addr;
-       unsigned long mem_ptr;
-       const u8 *shadow_bottom;
-       const u8 *shadow_ptr;
-       const unsigned long *frame;
-
-       BUILD_BUG_ON(IS_ENABLED(CONFIG_STACK_GROWSUP));
-
-       /*
-        * NOTE: We currently only support printing frame information for
-        * accesses to the task's own stack.
-        */
-       if (!object_is_on_stack(addr))
-               return false;
-
-       aligned_addr = round_down((unsigned long)addr, sizeof(long));
-       mem_ptr = round_down(aligned_addr, KASAN_SHADOW_SCALE_SIZE);
-       shadow_ptr = kasan_mem_to_shadow((void *)aligned_addr);
-       shadow_bottom = kasan_mem_to_shadow(end_of_stack(current));
-
-       while (shadow_ptr >= shadow_bottom && *shadow_ptr != KASAN_STACK_LEFT) {
-               shadow_ptr--;
-               mem_ptr -= KASAN_SHADOW_SCALE_SIZE;
-       }
-
-       while (shadow_ptr >= shadow_bottom && *shadow_ptr == KASAN_STACK_LEFT) {
-               shadow_ptr--;
-               mem_ptr -= KASAN_SHADOW_SCALE_SIZE;
-       }
-
-       if (shadow_ptr < shadow_bottom)
-               return false;
-
-       frame = (const unsigned long *)(mem_ptr + KASAN_SHADOW_SCALE_SIZE);
-       if (frame[0] != KASAN_CURRENT_STACK_FRAME_MAGIC) {
-               pr_err("KASAN internal error: frame info validation failed; invalid marker: %lu\n",
-                      frame[0]);
-               return false;
-       }
-
-       *offset = (unsigned long)addr - (unsigned long)frame;
-       *frame_descr = (const char *)frame[1];
-       *frame_pc = (void *)frame[2];
-
-       return true;
-}
-
-static void print_address_stack_frame(const void *addr)
-{
-       unsigned long offset;
-       const char *frame_descr;
-       const void *frame_pc;
-
-       if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-               return;
-
-       if (!get_address_stack_frame_info(addr, &offset, &frame_descr,
-                                         &frame_pc))
-               return;
-
-       /*
-        * get_address_stack_frame_info only returns true if the given addr is
-        * on the current task's stack.
-        */
-       pr_err("\n");
-       pr_err("addr %px is located in stack of task %s/%d at offset %lu in frame:\n",
-              addr, current->comm, task_pid_nr(current), offset);
-       pr_err(" %pS\n", frame_pc);
-
-       if (!frame_descr)
-               return;
-
-       print_decoded_frame_descr(frame_descr);
-}
-
 static void print_address_description(void *addr, u8 tag)
 {
        struct page *page = kasan_addr_to_page(addr);
@@ -405,62 +250,68 @@ static void print_address_description(void *addr, u8 tag)
        print_address_stack_frame(addr);
 }
 
-static bool row_is_guilty(const void *row, const void *guilty)
+static bool meta_row_is_guilty(const void *row, const void *addr)
 {
-       return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW);
+       return (row <= addr) && (addr < row + META_MEM_BYTES_PER_ROW);
 }
 
-static int shadow_pointer_offset(const void *row, const void *shadow)
+static int meta_pointer_offset(const void *row, const void *addr)
 {
-       /* The length of ">ff00ff00ff00ff00: " is
-        *    3 + (BITS_PER_LONG/8)*2 chars.
+       /*
+        * Memory state around the buggy address:
+        *  ff00ff00ff00ff00: 00 00 00 05 fe fe fe fe fe fe fe fe fe fe fe fe
+        *  ...
+        *
+        * The length of ">ff00ff00ff00ff00: " is
+        *    3 + (BITS_PER_LONG / 8) * 2 chars.
+        * The length of each granule metadata is 2 bytes
+        *    plus 1 byte for space.
         */
-       return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 +
-               (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1;
+       return 3 + (BITS_PER_LONG / 8) * 2 +
+               (addr - row) / KASAN_GRANULE_SIZE * 3 + 1;
 }
 
-static void print_shadow_for_address(const void *addr)
+static void print_memory_metadata(const void *addr)
 {
        int i;
-       const void *shadow = kasan_mem_to_shadow(addr);
-       const void *shadow_row;
+       void *row;
 
-       shadow_row = (void *)round_down((unsigned long)shadow,
-                                       SHADOW_BYTES_PER_ROW)
-               - SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW;
+       row = (void *)round_down((unsigned long)addr, META_MEM_BYTES_PER_ROW)
+                       - META_ROWS_AROUND_ADDR * META_MEM_BYTES_PER_ROW;
 
        pr_err("Memory state around the buggy address:\n");
 
-       for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) {
-               const void *kaddr = kasan_shadow_to_mem(shadow_row);
-               char buffer[4 + (BITS_PER_LONG/8)*2];
-               char shadow_buf[SHADOW_BYTES_PER_ROW];
+       for (i = -META_ROWS_AROUND_ADDR; i <= META_ROWS_AROUND_ADDR; i++) {
+               char buffer[4 + (BITS_PER_LONG / 8) * 2];
+               char metadata[META_BYTES_PER_ROW];
 
                snprintf(buffer, sizeof(buffer),
-                       (i == 0) ? ">%px: " : " %px: ", kaddr);
+                               (i == 0) ? ">%px: " : " %px: ", row);
+
                /*
                 * We should not pass a shadow pointer to generic
                 * function, because generic functions may try to
                 * access kasan mapping for the passed address.
                 */
-               memcpy(shadow_buf, shadow_row, SHADOW_BYTES_PER_ROW);
+               metadata_fetch_row(&metadata[0], row);
+
                print_hex_dump(KERN_ERR, buffer,
-                       DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1,
-                       shadow_buf, SHADOW_BYTES_PER_ROW, 0);
+                       DUMP_PREFIX_NONE, META_BYTES_PER_ROW, 1,
+                       metadata, META_BYTES_PER_ROW, 0);
 
-               if (row_is_guilty(shadow_row, shadow))
-                       pr_err("%*c\n",
-                               shadow_pointer_offset(shadow_row, shadow),
-                               '^');
+               if (meta_row_is_guilty(row, addr))
+                       pr_err("%*c\n", meta_pointer_offset(row, addr), '^');
 
-               shadow_row += SHADOW_BYTES_PER_ROW;
+               row += META_MEM_BYTES_PER_ROW;
        }
 }
 
 static bool report_enabled(void)
 {
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        if (current->kasan_depth)
                return false;
+#endif
        if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
                return true;
        return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
@@ -490,7 +341,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
        unsigned long flags;
        u8 tag = get_tag(object);
 
-       object = reset_tag(object);
+       object = kasan_reset_tag(object);
 
 #if IS_ENABLED(CONFIG_KUNIT)
        if (current->kunit_test)
@@ -503,7 +354,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
        pr_err("\n");
        print_address_description(object, tag);
        pr_err("\n");
-       print_shadow_for_address(object);
+       print_memory_metadata(object);
        end_report(&flags);
 }
 
@@ -523,10 +374,10 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
        disable_trace_on_warning();
 
        tagged_addr = (void *)addr;
-       untagged_addr = reset_tag(tagged_addr);
+       untagged_addr = kasan_reset_tag(tagged_addr);
 
        info.access_addr = tagged_addr;
-       if (addr_has_shadow(untagged_addr))
+       if (addr_has_metadata(untagged_addr))
                info.first_bad_addr = find_first_bad_addr(tagged_addr, size);
        else
                info.first_bad_addr = untagged_addr;
@@ -537,14 +388,14 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
        start_report(&flags);
 
        print_error_description(&info);
-       if (addr_has_shadow(untagged_addr))
+       if (addr_has_metadata(untagged_addr))
                print_tags(get_tag(tagged_addr), info.first_bad_addr);
        pr_err("\n");
 
-       if (addr_has_shadow(untagged_addr)) {
+       if (addr_has_metadata(untagged_addr)) {
                print_address_description(untagged_addr, get_tag(tagged_addr));
                pr_err("\n");
-               print_shadow_for_address(info.first_bad_addr);
+               print_memory_metadata(info.first_bad_addr);
        } else {
                dump_stack();
        }
@@ -604,6 +455,6 @@ void kasan_non_canonical_hook(unsigned long addr)
        else
                bug_type = "maybe wild-memory-access";
        pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type,
-                orig_addr, orig_addr + KASAN_SHADOW_MASK);
+                orig_addr, orig_addr + KASAN_GRANULE_SIZE - 1);
 }
 #endif
diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c
new file mode 100644 (file)
index 0000000..8a9c889
--- /dev/null
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains generic KASAN specific error reporting code.
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * Some code borrowed from https://github.com/xairy/kasan-prototype by
+ *        Andrey Konovalov <andreyknvl@gmail.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/ftrace.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/slab.h>
+#include <linux/stackdepot.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kasan.h>
+#include <linux/module.h>
+
+#include <asm/sections.h>
+
+#include "kasan.h"
+#include "../slab.h"
+
+void *find_first_bad_addr(void *addr, size_t size)
+{
+       void *p = addr;
+
+       while (p < addr + size && !(*(u8 *)kasan_mem_to_shadow(p)))
+               p += KASAN_GRANULE_SIZE;
+       return p;
+}
+
+static const char *get_shadow_bug_type(struct kasan_access_info *info)
+{
+       const char *bug_type = "unknown-crash";
+       u8 *shadow_addr;
+
+       shadow_addr = (u8 *)kasan_mem_to_shadow(info->first_bad_addr);
+
+       /*
+        * If shadow byte value is in [0, KASAN_GRANULE_SIZE) we can look
+        * at the next shadow byte to determine the type of the bad access.
+        */
+       if (*shadow_addr > 0 && *shadow_addr <= KASAN_GRANULE_SIZE - 1)
+               shadow_addr++;
+
+       switch (*shadow_addr) {
+       case 0 ... KASAN_GRANULE_SIZE - 1:
+               /*
+                * In theory it's still possible to see these shadow values
+                * due to a data race in the kernel code.
+                */
+               bug_type = "out-of-bounds";
+               break;
+       case KASAN_PAGE_REDZONE:
+       case KASAN_KMALLOC_REDZONE:
+               bug_type = "slab-out-of-bounds";
+               break;
+       case KASAN_GLOBAL_REDZONE:
+               bug_type = "global-out-of-bounds";
+               break;
+       case KASAN_STACK_LEFT:
+       case KASAN_STACK_MID:
+       case KASAN_STACK_RIGHT:
+       case KASAN_STACK_PARTIAL:
+               bug_type = "stack-out-of-bounds";
+               break;
+       case KASAN_FREE_PAGE:
+       case KASAN_KMALLOC_FREE:
+       case KASAN_KMALLOC_FREETRACK:
+               bug_type = "use-after-free";
+               break;
+       case KASAN_ALLOCA_LEFT:
+       case KASAN_ALLOCA_RIGHT:
+               bug_type = "alloca-out-of-bounds";
+               break;
+       case KASAN_VMALLOC_INVALID:
+               bug_type = "vmalloc-out-of-bounds";
+               break;
+       }
+
+       return bug_type;
+}
+
+static const char *get_wild_bug_type(struct kasan_access_info *info)
+{
+       const char *bug_type = "unknown-crash";
+
+       if ((unsigned long)info->access_addr < PAGE_SIZE)
+               bug_type = "null-ptr-deref";
+       else if ((unsigned long)info->access_addr < TASK_SIZE)
+               bug_type = "user-memory-access";
+       else
+               bug_type = "wild-memory-access";
+
+       return bug_type;
+}
+
+const char *get_bug_type(struct kasan_access_info *info)
+{
+       /*
+        * If access_size is a negative number, then it has reason to be
+        * defined as out-of-bounds bug type.
+        *
+        * Casting negative numbers to size_t would indeed turn up as
+        * a large size_t and its value will be larger than ULONG_MAX/2,
+        * so that this can qualify as out-of-bounds.
+        */
+       if (info->access_addr + info->access_size < info->access_addr)
+               return "out-of-bounds";
+
+       if (addr_has_metadata(info->access_addr))
+               return get_shadow_bug_type(info);
+       return get_wild_bug_type(info);
+}
+
+void metadata_fetch_row(char *buffer, void *row)
+{
+       memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW);
+}
+
+#if CONFIG_KASAN_STACK
+static bool __must_check tokenize_frame_descr(const char **frame_descr,
+                                             char *token, size_t max_tok_len,
+                                             unsigned long *value)
+{
+       const char *sep = strchr(*frame_descr, ' ');
+
+       if (sep == NULL)
+               sep = *frame_descr + strlen(*frame_descr);
+
+       if (token != NULL) {
+               const size_t tok_len = sep - *frame_descr;
+
+               if (tok_len + 1 > max_tok_len) {
+                       pr_err("KASAN internal error: frame description too long: %s\n",
+                              *frame_descr);
+                       return false;
+               }
+
+               /* Copy token (+ 1 byte for '\0'). */
+               strlcpy(token, *frame_descr, tok_len + 1);
+       }
+
+       /* Advance frame_descr past separator. */
+       *frame_descr = sep + 1;
+
+       if (value != NULL && kstrtoul(token, 10, value)) {
+               pr_err("KASAN internal error: not a valid number: %s\n", token);
+               return false;
+       }
+
+       return true;
+}
+
+static void print_decoded_frame_descr(const char *frame_descr)
+{
+       /*
+        * We need to parse the following string:
+        *    "n alloc_1 alloc_2 ... alloc_n"
+        * where alloc_i looks like
+        *    "offset size len name"
+        * or "offset size len name:line".
+        */
+
+       char token[64];
+       unsigned long num_objects;
+
+       if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
+                                 &num_objects))
+               return;
+
+       pr_err("\n");
+       pr_err("this frame has %lu %s:\n", num_objects,
+              num_objects == 1 ? "object" : "objects");
+
+       while (num_objects--) {
+               unsigned long offset;
+               unsigned long size;
+
+               /* access offset */
+               if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
+                                         &offset))
+                       return;
+               /* access size */
+               if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
+                                         &size))
+                       return;
+               /* name length (unused) */
+               if (!tokenize_frame_descr(&frame_descr, NULL, 0, NULL))
+                       return;
+               /* object name */
+               if (!tokenize_frame_descr(&frame_descr, token, sizeof(token),
+                                         NULL))
+                       return;
+
+               /* Strip line number; without filename it's not very helpful. */
+               strreplace(token, ':', '\0');
+
+               /* Finally, print object information. */
+               pr_err(" [%lu, %lu) '%s'", offset, offset + size, token);
+       }
+}
+
+static bool __must_check get_address_stack_frame_info(const void *addr,
+                                                     unsigned long *offset,
+                                                     const char **frame_descr,
+                                                     const void **frame_pc)
+{
+       unsigned long aligned_addr;
+       unsigned long mem_ptr;
+       const u8 *shadow_bottom;
+       const u8 *shadow_ptr;
+       const unsigned long *frame;
+
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_STACK_GROWSUP));
+
+       /*
+        * NOTE: We currently only support printing frame information for
+        * accesses to the task's own stack.
+        */
+       if (!object_is_on_stack(addr))
+               return false;
+
+       aligned_addr = round_down((unsigned long)addr, sizeof(long));
+       mem_ptr = round_down(aligned_addr, KASAN_GRANULE_SIZE);
+       shadow_ptr = kasan_mem_to_shadow((void *)aligned_addr);
+       shadow_bottom = kasan_mem_to_shadow(end_of_stack(current));
+
+       while (shadow_ptr >= shadow_bottom && *shadow_ptr != KASAN_STACK_LEFT) {
+               shadow_ptr--;
+               mem_ptr -= KASAN_GRANULE_SIZE;
+       }
+
+       while (shadow_ptr >= shadow_bottom && *shadow_ptr == KASAN_STACK_LEFT) {
+               shadow_ptr--;
+               mem_ptr -= KASAN_GRANULE_SIZE;
+       }
+
+       if (shadow_ptr < shadow_bottom)
+               return false;
+
+       frame = (const unsigned long *)(mem_ptr + KASAN_GRANULE_SIZE);
+       if (frame[0] != KASAN_CURRENT_STACK_FRAME_MAGIC) {
+               pr_err("KASAN internal error: frame info validation failed; invalid marker: %lu\n",
+                      frame[0]);
+               return false;
+       }
+
+       *offset = (unsigned long)addr - (unsigned long)frame;
+       *frame_descr = (const char *)frame[1];
+       *frame_pc = (void *)frame[2];
+
+       return true;
+}
+
+void print_address_stack_frame(const void *addr)
+{
+       unsigned long offset;
+       const char *frame_descr;
+       const void *frame_pc;
+
+       if (!get_address_stack_frame_info(addr, &offset, &frame_descr,
+                                         &frame_pc))
+               return;
+
+       /*
+        * get_address_stack_frame_info only returns true if the given addr is
+        * on the current task's stack.
+        */
+       pr_err("\n");
+       pr_err("addr %px is located in stack of task %s/%d at offset %lu in frame:\n",
+              addr, current->comm, task_pid_nr(current), offset);
+       pr_err(" %pS\n", frame_pc);
+
+       if (!frame_descr)
+               return;
+
+       print_decoded_frame_descr(frame_descr);
+}
+#endif /* CONFIG_KASAN_STACK */
+
+#define DEFINE_ASAN_REPORT_LOAD(size)                     \
+void __asan_report_load##size##_noabort(unsigned long addr) \
+{                                                         \
+       kasan_report(addr, size, false, _RET_IP_);        \
+}                                                         \
+EXPORT_SYMBOL(__asan_report_load##size##_noabort)
+
+#define DEFINE_ASAN_REPORT_STORE(size)                     \
+void __asan_report_store##size##_noabort(unsigned long addr) \
+{                                                          \
+       kasan_report(addr, size, true, _RET_IP_);          \
+}                                                          \
+EXPORT_SYMBOL(__asan_report_store##size##_noabort)
+
+DEFINE_ASAN_REPORT_LOAD(1);
+DEFINE_ASAN_REPORT_LOAD(2);
+DEFINE_ASAN_REPORT_LOAD(4);
+DEFINE_ASAN_REPORT_LOAD(8);
+DEFINE_ASAN_REPORT_LOAD(16);
+DEFINE_ASAN_REPORT_STORE(1);
+DEFINE_ASAN_REPORT_STORE(2);
+DEFINE_ASAN_REPORT_STORE(4);
+DEFINE_ASAN_REPORT_STORE(8);
+DEFINE_ASAN_REPORT_STORE(16);
+
+void __asan_report_load_n_noabort(unsigned long addr, size_t size)
+{
+       kasan_report(addr, size, false, _RET_IP_);
+}
+EXPORT_SYMBOL(__asan_report_load_n_noabort);
+
+void __asan_report_store_n_noabort(unsigned long addr, size_t size)
+{
+       kasan_report(addr, size, true, _RET_IP_);
+}
+EXPORT_SYMBOL(__asan_report_store_n_noabort);
diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c
new file mode 100644 (file)
index 0000000..57114f0
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains hardware tag-based KASAN specific error reporting code.
+ *
+ * Copyright (c) 2020 Google, Inc.
+ * Author: Andrey Konovalov <andreyknvl@google.com>
+ */
+
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "kasan.h"
+
+const char *get_bug_type(struct kasan_access_info *info)
+{
+       return "invalid-access";
+}
+
+void *find_first_bad_addr(void *addr, size_t size)
+{
+       return kasan_reset_tag(addr);
+}
+
+void metadata_fetch_row(char *buffer, void *row)
+{
+       int i;
+
+       for (i = 0; i < META_BYTES_PER_ROW; i++)
+               buffer[i] = hw_get_mem_tag(row + i * KASAN_GRANULE_SIZE);
+}
+
+void print_tags(u8 addr_tag, const void *addr)
+{
+       u8 memory_tag = hw_get_mem_tag((void *)addr);
+
+       pr_err("Pointer tag: [%02x], memory tag: [%02x]\n",
+               addr_tag, memory_tag);
+}
similarity index 78%
rename from mm/kasan/tags_report.c
rename to mm/kasan/report_sw_tags.c
index bee4371..1b02679 100644 (file)
@@ -1,17 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * This file contains tag-based KASAN specific error reporting code.
+ * This file contains software tag-based KASAN specific error reporting code.
  *
  * Copyright (c) 2014 Samsung Electronics Co., Ltd.
  * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
  *
  * Some code borrowed from https://github.com/xairy/kasan-prototype by
  *        Andrey Konovalov <andreyknvl@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
 #include <linux/bitops.h>
@@ -46,16 +41,19 @@ const char *get_bug_type(struct kasan_access_info *info)
        int i;
 
        tag = get_tag(info->access_addr);
-       addr = reset_tag(info->access_addr);
+       addr = kasan_reset_tag(info->access_addr);
        page = kasan_addr_to_page(addr);
        if (page && PageSlab(page)) {
                cache = page->slab_cache;
                object = nearest_obj(cache, page, (void *)addr);
-               alloc_meta = get_alloc_info(cache, object);
+               alloc_meta = kasan_get_alloc_meta(cache, object);
 
-               for (i = 0; i < KASAN_NR_FREE_STACKS; i++)
-                       if (alloc_meta->free_pointer_tag[i] == tag)
-                               return "use-after-free";
+               if (alloc_meta) {
+                       for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
+                               if (alloc_meta->free_pointer_tag[i] == tag)
+                                       return "use-after-free";
+                       }
+               }
                return "out-of-bounds";
        }
 
@@ -77,14 +75,19 @@ const char *get_bug_type(struct kasan_access_info *info)
 void *find_first_bad_addr(void *addr, size_t size)
 {
        u8 tag = get_tag(addr);
-       void *p = reset_tag(addr);
+       void *p = kasan_reset_tag(addr);
        void *end = p + size;
 
        while (p < end && tag == *(u8 *)kasan_mem_to_shadow(p))
-               p += KASAN_SHADOW_SCALE_SIZE;
+               p += KASAN_GRANULE_SIZE;
        return p;
 }
 
+void metadata_fetch_row(char *buffer, void *row)
+{
+       memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW);
+}
+
 void print_tags(u8 addr_tag, const void *addr)
 {
        u8 *shadow = (u8 *)kasan_mem_to_shadow(addr);
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
new file mode 100644 (file)
index 0000000..7c2c08c
--- /dev/null
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains KASAN runtime code that manages shadow memory for
+ * generic and software tag-based KASAN modes.
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * Some code borrowed from https://github.com/xairy/kasan-prototype by
+ *        Andrey Konovalov <andreyknvl@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/kmemleak.h>
+#include <linux/memory.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+#include "kasan.h"
+
+bool __kasan_check_read(const volatile void *p, unsigned int size)
+{
+       return check_memory_region((unsigned long)p, size, false, _RET_IP_);
+}
+EXPORT_SYMBOL(__kasan_check_read);
+
+bool __kasan_check_write(const volatile void *p, unsigned int size)
+{
+       return check_memory_region((unsigned long)p, size, true, _RET_IP_);
+}
+EXPORT_SYMBOL(__kasan_check_write);
+
+#undef memset
+void *memset(void *addr, int c, size_t len)
+{
+       if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_))
+               return NULL;
+
+       return __memset(addr, c, len);
+}
+
+#ifdef __HAVE_ARCH_MEMMOVE
+#undef memmove
+void *memmove(void *dest, const void *src, size_t len)
+{
+       if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
+           !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
+               return NULL;
+
+       return __memmove(dest, src, len);
+}
+#endif
+
+#undef memcpy
+void *memcpy(void *dest, const void *src, size_t len)
+{
+       if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
+           !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
+               return NULL;
+
+       return __memcpy(dest, src, len);
+}
+
+/*
+ * Poisons the shadow memory for 'size' bytes starting from 'addr'.
+ * Memory addresses should be aligned to KASAN_GRANULE_SIZE.
+ */
+void poison_range(const void *address, size_t size, u8 value)
+{
+       void *shadow_start, *shadow_end;
+
+       /*
+        * Perform shadow offset calculation based on untagged address, as
+        * some of the callers (e.g. kasan_poison_object_data) pass tagged
+        * addresses to this function.
+        */
+       address = kasan_reset_tag(address);
+       size = round_up(size, KASAN_GRANULE_SIZE);
+
+       shadow_start = kasan_mem_to_shadow(address);
+       shadow_end = kasan_mem_to_shadow(address + size);
+
+       __memset(shadow_start, value, shadow_end - shadow_start);
+}
+
+void unpoison_range(const void *address, size_t size)
+{
+       u8 tag = get_tag(address);
+
+       /*
+        * Perform shadow offset calculation based on untagged address, as
+        * some of the callers (e.g. kasan_unpoison_object_data) pass tagged
+        * addresses to this function.
+        */
+       address = kasan_reset_tag(address);
+
+       poison_range(address, size, tag);
+
+       if (size & KASAN_GRANULE_MASK) {
+               u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size);
+
+               if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
+                       *shadow = tag;
+               else /* CONFIG_KASAN_GENERIC */
+                       *shadow = size & KASAN_GRANULE_MASK;
+       }
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static bool shadow_mapped(unsigned long addr)
+{
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       if (pgd_none(*pgd))
+               return false;
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d))
+               return false;
+       pud = pud_offset(p4d, addr);
+       if (pud_none(*pud))
+               return false;
+
+       /*
+        * We can't use pud_large() or pud_huge(), the first one is
+        * arch-specific, the last one depends on HUGETLB_PAGE.  So let's abuse
+        * pud_bad(), if pud is bad then it's bad because it's huge.
+        */
+       if (pud_bad(*pud))
+               return true;
+       pmd = pmd_offset(pud, addr);
+       if (pmd_none(*pmd))
+               return false;
+
+       if (pmd_bad(*pmd))
+               return true;
+       pte = pte_offset_kernel(pmd, addr);
+       return !pte_none(*pte);
+}
+
+static int __meminit kasan_mem_notifier(struct notifier_block *nb,
+                       unsigned long action, void *data)
+{
+       struct memory_notify *mem_data = data;
+       unsigned long nr_shadow_pages, start_kaddr, shadow_start;
+       unsigned long shadow_end, shadow_size;
+
+       nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT;
+       start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn);
+       shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr);
+       shadow_size = nr_shadow_pages << PAGE_SHIFT;
+       shadow_end = shadow_start + shadow_size;
+
+       if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) ||
+               WARN_ON(start_kaddr % KASAN_MEMORY_PER_SHADOW_PAGE))
+               return NOTIFY_BAD;
+
+       switch (action) {
+       case MEM_GOING_ONLINE: {
+               void *ret;
+
+               /*
+                * If shadow is mapped already than it must have been mapped
+                * during the boot. This could happen if we onlining previously
+                * offlined memory.
+                */
+               if (shadow_mapped(shadow_start))
+                       return NOTIFY_OK;
+
+               ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start,
+                                       shadow_end, GFP_KERNEL,
+                                       PAGE_KERNEL, VM_NO_GUARD,
+                                       pfn_to_nid(mem_data->start_pfn),
+                                       __builtin_return_address(0));
+               if (!ret)
+                       return NOTIFY_BAD;
+
+               kmemleak_ignore(ret);
+               return NOTIFY_OK;
+       }
+       case MEM_CANCEL_ONLINE:
+       case MEM_OFFLINE: {
+               struct vm_struct *vm;
+
+               /*
+                * shadow_start was either mapped during boot by kasan_init()
+                * or during memory online by __vmalloc_node_range().
+                * In the latter case we can use vfree() to free shadow.
+                * Non-NULL result of the find_vm_area() will tell us if
+                * that was the second case.
+                *
+                * Currently it's not possible to free shadow mapped
+                * during boot by kasan_init(). It's because the code
+                * to do that hasn't been written yet. So we'll just
+                * leak the memory.
+                */
+               vm = find_vm_area((void *)shadow_start);
+               if (vm)
+                       vfree((void *)shadow_start);
+       }
+       }
+
+       return NOTIFY_OK;
+}
+
+static int __init kasan_memhotplug_init(void)
+{
+       hotplug_memory_notifier(kasan_mem_notifier, 0);
+
+       return 0;
+}
+
+core_initcall(kasan_memhotplug_init);
+#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+
+static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+                                     void *unused)
+{
+       unsigned long page;
+       pte_t pte;
+
+       if (likely(!pte_none(*ptep)))
+               return 0;
+
+       page = __get_free_page(GFP_KERNEL);
+       if (!page)
+               return -ENOMEM;
+
+       memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
+       pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
+
+       spin_lock(&init_mm.page_table_lock);
+       if (likely(pte_none(*ptep))) {
+               set_pte_at(&init_mm, addr, ptep, pte);
+               page = 0;
+       }
+       spin_unlock(&init_mm.page_table_lock);
+       if (page)
+               free_page(page);
+       return 0;
+}
+
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
+{
+       unsigned long shadow_start, shadow_end;
+       int ret;
+
+       if (!is_vmalloc_or_module_addr((void *)addr))
+               return 0;
+
+       shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
+       shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
+       shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
+       shadow_end = ALIGN(shadow_end, PAGE_SIZE);
+
+       ret = apply_to_page_range(&init_mm, shadow_start,
+                                 shadow_end - shadow_start,
+                                 kasan_populate_vmalloc_pte, NULL);
+       if (ret)
+               return ret;
+
+       flush_cache_vmap(shadow_start, shadow_end);
+
+       /*
+        * We need to be careful about inter-cpu effects here. Consider:
+        *
+        *   CPU#0                                CPU#1
+        * WRITE_ONCE(p, vmalloc(100));         while (x = READ_ONCE(p)) ;
+        *                                      p[99] = 1;
+        *
+        * With compiler instrumentation, that ends up looking like this:
+        *
+        *   CPU#0                                CPU#1
+        * // vmalloc() allocates memory
+        * // let a = area->addr
+        * // we reach kasan_populate_vmalloc
+        * // and call unpoison_range:
+        * STORE shadow(a), unpoison_val
+        * ...
+        * STORE shadow(a+99), unpoison_val     x = LOAD p
+        * // rest of vmalloc process           <data dependency>
+        * STORE p, a                           LOAD shadow(x+99)
+        *
+        * If there is no barrier between the end of unpoisioning the shadow
+        * and the store of the result to p, the stores could be committed
+        * in a different order by CPU#0, and CPU#1 could erroneously observe
+        * poison in the shadow.
+        *
+        * We need some sort of barrier between the stores.
+        *
+        * In the vmalloc() case, this is provided by a smp_wmb() in
+        * clear_vm_uninitialized_flag(). In the per-cpu allocator and in
+        * get_vm_area() and friends, the caller gets shadow allocated but
+        * doesn't have any pages mapped into the virtual address space that
+        * has been reserved. Mapping those pages in will involve taking and
+        * releasing a page-table lock, which will provide the barrier.
+        */
+
+       return 0;
+}
+
+/*
+ * Poison the shadow for a vmalloc region. Called as part of the
+ * freeing process at the time the region is freed.
+ */
+void kasan_poison_vmalloc(const void *start, unsigned long size)
+{
+       if (!is_vmalloc_or_module_addr(start))
+               return;
+
+       size = round_up(size, KASAN_GRANULE_SIZE);
+       poison_range(start, size, KASAN_VMALLOC_INVALID);
+}
+
+void kasan_unpoison_vmalloc(const void *start, unsigned long size)
+{
+       if (!is_vmalloc_or_module_addr(start))
+               return;
+
+       unpoison_range(start, size);
+}
+
+static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+                                       void *unused)
+{
+       unsigned long page;
+
+       page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
+
+       spin_lock(&init_mm.page_table_lock);
+
+       if (likely(!pte_none(*ptep))) {
+               pte_clear(&init_mm, addr, ptep);
+               free_page(page);
+       }
+       spin_unlock(&init_mm.page_table_lock);
+
+       return 0;
+}
+
+/*
+ * Release the backing for the vmalloc region [start, end), which
+ * lies within the free region [free_region_start, free_region_end).
+ *
+ * This can be run lazily, long after the region was freed. It runs
+ * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap
+ * infrastructure.
+ *
+ * How does this work?
+ * -------------------
+ *
+ * We have a region that is page aligned, labelled as A.
+ * That might not map onto the shadow in a way that is page-aligned:
+ *
+ *                    start                     end
+ *                    v                         v
+ * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc
+ *  -------- -------- --------          -------- --------
+ *      |        |       |                 |        |
+ *      |        |       |         /-------/        |
+ *      \-------\|/------/         |/---------------/
+ *              |||                ||
+ *             |??AAAAAA|AAAAAAAA|AA??????|                < shadow
+ *                 (1)      (2)      (3)
+ *
+ * First we align the start upwards and the end downwards, so that the
+ * shadow of the region aligns with shadow page boundaries. In the
+ * example, this gives us the shadow page (2). This is the shadow entirely
+ * covered by this allocation.
+ *
+ * Then we have the tricky bits. We want to know if we can free the
+ * partially covered shadow pages - (1) and (3) in the example. For this,
+ * we are given the start and end of the free region that contains this
+ * allocation. Extending our previous example, we could have:
+ *
+ *  free_region_start                                    free_region_end
+ *  |                 start                     end      |
+ *  v                 v                         v        v
+ * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc
+ *  -------- -------- --------          -------- --------
+ *      |        |       |                 |        |
+ *      |        |       |         /-------/        |
+ *      \-------\|/------/         |/---------------/
+ *              |||                ||
+ *             |FFAAAAAA|AAAAAAAA|AAF?????|                < shadow
+ *                 (1)      (2)      (3)
+ *
+ * Once again, we align the start of the free region up, and the end of
+ * the free region down so that the shadow is page aligned. So we can free
+ * page (1) - we know no allocation currently uses anything in that page,
+ * because all of it is in the vmalloc free region. But we cannot free
+ * page (3), because we can't be sure that the rest of it is unused.
+ *
+ * We only consider pages that contain part of the original region for
+ * freeing: we don't try to free other pages from the free region or we'd
+ * end up trying to free huge chunks of virtual address space.
+ *
+ * Concurrency
+ * -----------
+ *
+ * How do we know that we're not freeing a page that is simultaneously
+ * being used for a fresh allocation in kasan_populate_vmalloc(_pte)?
+ *
+ * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running
+ * at the same time. While we run under free_vmap_area_lock, the population
+ * code does not.
+ *
+ * free_vmap_area_lock instead operates to ensure that the larger range
+ * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and
+ * the per-cpu region-finding algorithm both run under free_vmap_area_lock,
+ * no space identified as free will become used while we are running. This
+ * means that so long as we are careful with alignment and only free shadow
+ * pages entirely covered by the free region, we will not run in to any
+ * trouble - any simultaneous allocations will be for disjoint regions.
+ */
+void kasan_release_vmalloc(unsigned long start, unsigned long end,
+                          unsigned long free_region_start,
+                          unsigned long free_region_end)
+{
+       void *shadow_start, *shadow_end;
+       unsigned long region_start, region_end;
+       unsigned long size;
+
+       region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE);
+       region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE);
+
+       free_region_start = ALIGN(free_region_start, KASAN_MEMORY_PER_SHADOW_PAGE);
+
+       if (start != region_start &&
+           free_region_start < region_start)
+               region_start -= KASAN_MEMORY_PER_SHADOW_PAGE;
+
+       free_region_end = ALIGN_DOWN(free_region_end, KASAN_MEMORY_PER_SHADOW_PAGE);
+
+       if (end != region_end &&
+           free_region_end > region_end)
+               region_end += KASAN_MEMORY_PER_SHADOW_PAGE;
+
+       shadow_start = kasan_mem_to_shadow((void *)region_start);
+       shadow_end = kasan_mem_to_shadow((void *)region_end);
+
+       if (shadow_end > shadow_start) {
+               size = shadow_end - shadow_start;
+               apply_to_existing_page_range(&init_mm,
+                                            (unsigned long)shadow_start,
+                                            size, kasan_depopulate_vmalloc_pte,
+                                            NULL);
+               flush_tlb_kernel_range((unsigned long)shadow_start,
+                                      (unsigned long)shadow_end);
+       }
+}
+
+#else /* CONFIG_KASAN_VMALLOC */
+
+int kasan_module_alloc(void *addr, size_t size)
+{
+       void *ret;
+       size_t scaled_size;
+       size_t shadow_size;
+       unsigned long shadow_start;
+
+       shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
+       scaled_size = (size + KASAN_GRANULE_SIZE - 1) >>
+                               KASAN_SHADOW_SCALE_SHIFT;
+       shadow_size = round_up(scaled_size, PAGE_SIZE);
+
+       if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
+               return -EINVAL;
+
+       ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
+                       shadow_start + shadow_size,
+                       GFP_KERNEL,
+                       PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
+                       __builtin_return_address(0));
+
+       if (ret) {
+               __memset(ret, KASAN_SHADOW_INIT, shadow_size);
+               find_vm_area(addr)->flags |= VM_KASAN;
+               kmemleak_ignore(ret);
+               return 0;
+       }
+
+       return -ENOMEM;
+}
+
+void kasan_free_shadow(const struct vm_struct *vm)
+{
+       if (vm->flags & VM_KASAN)
+               vfree(kasan_mem_to_shadow(vm->addr));
+}
+
+#endif
similarity index 88%
rename from mm/kasan/tags.c
rename to mm/kasan/sw_tags.c
index e02a36a..5dcd830 100644 (file)
@@ -1,17 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * This file contains core tag-based KASAN code.
+ * This file contains core software tag-based KASAN code.
  *
  * Copyright (c) 2018 Google, Inc.
  * Author: Andrey Konovalov <andreyknvl@google.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define pr_fmt(fmt) "kasan: " fmt
 
 #include <linux/export.h>
 #include <linux/interrupt.h>
 
 static DEFINE_PER_CPU(u32, prng_state);
 
-void kasan_init_tags(void)
+void __init kasan_init_sw_tags(void)
 {
        int cpu;
 
        for_each_possible_cpu(cpu)
                per_cpu(prng_state, cpu) = (u32)get_cycles();
+
+       pr_info("KernelAddressSanitizer initialized\n");
 }
 
 /*
@@ -70,11 +67,6 @@ u8 random_tag(void)
        return (u8)(state % (KASAN_TAG_MAX + 1));
 }
 
-void *kasan_reset_tag(const void *addr)
-{
-       return reset_tag(addr);
-}
-
 bool check_memory_region(unsigned long addr, size_t size, bool write,
                                unsigned long ret_ip)
 {
@@ -110,7 +102,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
        if (tag == KASAN_TAG_KERNEL)
                return true;
 
-       untagged_addr = reset_tag((const void *)addr);
+       untagged_addr = kasan_reset_tag((const void *)addr);
        if (unlikely(untagged_addr <
                        kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) {
                return !kasan_report(addr, size, write, ret_ip);
@@ -126,6 +118,15 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
        return true;
 }
 
+bool check_invalid_free(void *addr)
+{
+       u8 tag = get_tag(addr);
+       u8 shadow_byte = READ_ONCE(*(u8 *)kasan_mem_to_shadow(kasan_reset_tag(addr)));
+
+       return (shadow_byte == KASAN_TAG_INVALID) ||
+               (tag != KASAN_TAG_KERNEL && tag != shadow_byte);
+}
+
 #define DEFINE_HWASAN_LOAD_STORE(size)                                 \
        void __hwasan_load##size##_noabort(unsigned long addr)          \
        {                                                               \
@@ -158,7 +159,7 @@ EXPORT_SYMBOL(__hwasan_storeN_noabort);
 
 void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size)
 {
-       kasan_poison_shadow((void *)addr, size, tag);
+       poison_range((void *)addr, size, tag);
 }
 EXPORT_SYMBOL(__hwasan_tag_memory);
 
@@ -168,7 +169,9 @@ void kasan_set_free_info(struct kmem_cache *cache,
        struct kasan_alloc_meta *alloc_meta;
        u8 idx = 0;
 
-       alloc_meta = get_alloc_info(cache, object);
+       alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (!alloc_meta)
+               return;
 
 #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
        idx = alloc_meta->free_track_idx;
@@ -185,7 +188,9 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
        struct kasan_alloc_meta *alloc_meta;
        int i = 0;
 
-       alloc_meta = get_alloc_info(cache, object);
+       alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (!alloc_meta)
+               return NULL;
 
 #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
        for (i = 0; i < KASAN_NR_FREE_STACKS; i++) {
index e3c7ca7..605f671 100644 (file)
@@ -1343,46 +1343,6 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
 #endif
 
 /**
- * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
- * @page: the page
- * @pgdat: pgdat of the page
- *
- * This function relies on page's memcg being stable - see the
- * access rules in commit_charge().
- */
-struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
-{
-       struct mem_cgroup_per_node *mz;
-       struct mem_cgroup *memcg;
-       struct lruvec *lruvec;
-
-       if (mem_cgroup_disabled()) {
-               lruvec = &pgdat->__lruvec;
-               goto out;
-       }
-
-       memcg = page_memcg(page);
-       /*
-        * Swapcache readahead pages are added to the LRU - and
-        * possibly migrated - before they are charged.
-        */
-       if (!memcg)
-               memcg = root_mem_cgroup;
-
-       mz = mem_cgroup_page_nodeinfo(memcg, page);
-       lruvec = &mz->lruvec;
-out:
-       /*
-        * Since a node can be onlined after the mem_cgroup was created,
-        * we have to be prepared to initialize lruvec->zone here;
-        * and if offlined then reonlined, we need to reinitialize it.
-        */
-       if (unlikely(lruvec->pgdat != pgdat))
-               lruvec->pgdat = pgdat;
-       return lruvec;
-}
-
-/**
  * lock_page_lruvec - lock and return lruvec for a given page.
  * @page: the page
  *
@@ -6987,6 +6947,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
                return;
 
        memcg = page_memcg(oldpage);
+       VM_WARN_ON_ONCE_PAGE(!memcg, oldpage);
        if (!memcg)
                return;
 
@@ -7178,12 +7139,15 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
        VM_BUG_ON_PAGE(PageLRU(page), page);
        VM_BUG_ON_PAGE(page_count(page), page);
 
+       if (mem_cgroup_disabled())
+               return;
+
        if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
                return;
 
        memcg = page_memcg(page);
 
-       /* Readahead page, never charged */
+       VM_WARN_ON_ONCE_PAGE(!memcg, page);
        if (!memcg)
                return;
 
@@ -7242,12 +7206,15 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
        struct mem_cgroup *memcg;
        unsigned short oldid;
 
+       if (mem_cgroup_disabled())
+               return 0;
+
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                return 0;
 
        memcg = page_memcg(page);
 
-       /* Readahead page, never charged */
+       VM_WARN_ON_ONCE_PAGE(!memcg, page);
        if (!memcg)
                return 0;
 
index c016042..af41fb9 100644 (file)
@@ -1784,39 +1784,112 @@ int remove_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(remove_memory);
 
+static int try_offline_memory_block(struct memory_block *mem, void *arg)
+{
+       uint8_t online_type = MMOP_ONLINE_KERNEL;
+       uint8_t **online_types = arg;
+       struct page *page;
+       int rc;
+
+       /*
+        * Sense the online_type via the zone of the memory block. Offlining
+        * with multiple zones within one memory block will be rejected
+        * by offlining code ... so we don't care about that.
+        */
+       page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr));
+       if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE)
+               online_type = MMOP_ONLINE_MOVABLE;
+
+       rc = device_offline(&mem->dev);
+       /*
+        * Default is MMOP_OFFLINE - change it only if offlining succeeded,
+        * so try_reonline_memory_block() can do the right thing.
+        */
+       if (!rc)
+               **online_types = online_type;
+
+       (*online_types)++;
+       /* Ignore if already offline. */
+       return rc < 0 ? rc : 0;
+}
+
+static int try_reonline_memory_block(struct memory_block *mem, void *arg)
+{
+       uint8_t **online_types = arg;
+       int rc;
+
+       if (**online_types != MMOP_OFFLINE) {
+               mem->online_type = **online_types;
+               rc = device_online(&mem->dev);
+               if (rc < 0)
+                       pr_warn("%s: Failed to re-online memory: %d",
+                               __func__, rc);
+       }
+
+       /* Continue processing all remaining memory blocks. */
+       (*online_types)++;
+       return 0;
+}
+
 /*
- * Try to offline and remove a memory block. Might take a long time to
- * finish in case memory is still in use. Primarily useful for memory devices
- * that logically unplugged all memory (so it's no longer in use) and want to
- * offline + remove the memory block.
+ * Try to offline and remove memory. Might take a long time to finish in case
+ * memory is still in use. Primarily useful for memory devices that logically
+ * unplugged all memory (so it's no longer in use) and want to offline + remove
+ * that memory.
  */
 int offline_and_remove_memory(int nid, u64 start, u64 size)
 {
-       struct memory_block *mem;
-       int rc = -EINVAL;
+       const unsigned long mb_count = size / memory_block_size_bytes();
+       uint8_t *online_types, *tmp;
+       int rc;
 
        if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
-           size != memory_block_size_bytes())
-               return rc;
+           !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
+               return -EINVAL;
+
+       /*
+        * We'll remember the old online type of each memory block, so we can
+        * try to revert whatever we did when offlining one memory block fails
+        * after offlining some others succeeded.
+        */
+       online_types = kmalloc_array(mb_count, sizeof(*online_types),
+                                    GFP_KERNEL);
+       if (!online_types)
+               return -ENOMEM;
+       /*
+        * Initialize all states to MMOP_OFFLINE, so when we abort processing in
+        * try_offline_memory_block(), we'll skip all unprocessed blocks in
+        * try_reonline_memory_block().
+        */
+       memset(online_types, MMOP_OFFLINE, mb_count);
 
        lock_device_hotplug();
-       mem = find_memory_block(__pfn_to_section(PFN_DOWN(start)));
-       if (mem)
-               rc = device_offline(&mem->dev);
-       /* Ignore if the device is already offline. */
-       if (rc > 0)
-               rc = 0;
+
+       tmp = online_types;
+       rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block);
 
        /*
-        * In case we succeeded to offline the memory block, remove it.
+        * In case we succeeded to offline all memory, remove it.
         * This cannot fail as it cannot get onlined in the meantime.
         */
        if (!rc) {
                rc = try_remove_memory(nid, start, size);
-               WARN_ON_ONCE(rc);
+               if (rc)
+                       pr_err("%s: Failed to remove memory: %d", __func__, rc);
+       }
+
+       /*
+        * Rollback what we did. While memory onlining might theoretically fail
+        * (nacked by a notifier), it barely ever happens.
+        */
+       if (rc) {
+               tmp = online_types;
+               walk_memory_blocks(start, size, &tmp,
+                                  try_reonline_memory_block);
        }
        unlock_device_hotplug();
 
+       kfree(online_types);
        return rc;
 }
 EXPORT_SYMBOL_GPL(offline_and_remove_memory);
index f473cdd..624ed51 100644 (file)
@@ -104,7 +104,7 @@ static inline void poison_element(mempool_t *pool, void *element)
 static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
 {
        if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
-               kasan_poison_kfree(element, _RET_IP_);
+               kasan_slab_free_mempool(element, _RET_IP_);
        else if (pool->alloc == mempool_alloc_pages)
                kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
@@ -112,7 +112,7 @@ static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
 static void kasan_unpoison_element(mempool_t *pool, void *element)
 {
        if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
-               kasan_unpoison_slab(element);
+               kasan_unpoison_range(element, __ksize(element));
        else if (pool->alloc == mempool_alloc_pages)
                kasan_alloc_pages(element, (unsigned long)pool->pool_data);
 }
index 10598e5..dc72060 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1897,8 +1897,8 @@ out:
        return addr;
 
 unmap_and_free_vma:
+       fput(vma->vm_file);
        vma->vm_file = NULL;
-       fput(file);
 
        /* Undo any partial mapping done by a device driver. */
        unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
index 3beeb8d..7a2c89b 100644 (file)
@@ -1204,8 +1204,10 @@ static void kernel_init_free_pages(struct page *page, int numpages)
 
        /* s390's use of memset() could override KASAN redzones. */
        kasan_disable_current();
-       for (i = 0; i < numpages; i++)
+       for (i = 0; i < numpages; i++) {
+               page_kasan_tag_reset(page + i);
                clear_highpage(page + i);
+       }
        kasan_enable_current();
 }
 
@@ -7671,6 +7673,11 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
                 * alias for the memset().
                 */
                direct_map_addr = page_address(page);
+               /*
+                * Perform a kasan-unchecked memset() since this memory
+                * has not been initialized.
+                */
+               direct_map_addr = kasan_reset_tag(direct_map_addr);
                if ((unsigned int)poison <= 0xFF)
                        memset(direct_map_addr, poison, PAGE_SIZE);
 
index 06ec518..65cdf84 100644 (file)
@@ -25,7 +25,7 @@ static void poison_page(struct page *page)
 
        /* KASAN still think the page is in-use, so skip it. */
        kasan_disable_current();
-       memset(addr, PAGE_POISON, PAGE_SIZE);
+       memset(kasan_reset_tag(addr), PAGE_POISON, PAGE_SIZE);
        kasan_enable_current();
        kunmap_atomic(addr);
 }
index ba88ec4..4354c14 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/ptdump.h>
 #include <linux/kasan.h>
 
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 /*
  * This is an optimization for KASAN=y case. Since all kasan page tables
  * eventually point to the kasan_early_shadow_page we could call note_page()
@@ -31,7 +31,8 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
        struct ptdump_state *st = walk->private;
        pgd_t val = READ_ONCE(*pgd);
 
-#if CONFIG_PGTABLE_LEVELS > 4 && defined(CONFIG_KASAN)
+#if CONFIG_PGTABLE_LEVELS > 4 && \
+               (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
        if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d)))
                return note_kasan_page_table(walk, addr);
 #endif
@@ -51,7 +52,8 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
        struct ptdump_state *st = walk->private;
        p4d_t val = READ_ONCE(*p4d);
 
-#if CONFIG_PGTABLE_LEVELS > 3 && defined(CONFIG_KASAN)
+#if CONFIG_PGTABLE_LEVELS > 3 && \
+               (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
        if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud)))
                return note_kasan_page_table(walk, addr);
 #endif
@@ -71,7 +73,8 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
        struct ptdump_state *st = walk->private;
        pud_t val = READ_ONCE(*pud);
 
-#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_KASAN)
+#if CONFIG_PGTABLE_LEVELS > 2 && \
+               (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
        if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd)))
                return note_kasan_page_table(walk, addr);
 #endif
@@ -91,7 +94,7 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
        struct ptdump_state *st = walk->private;
        pmd_t val = READ_ONCE(*pmd);
 
-#if defined(CONFIG_KASAN)
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
        if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte)))
                return note_kasan_page_table(walk, addr);
 #endif
index 2f2b55c..e981c80 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/debugfs.h>
+#include <linux/kasan.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
@@ -53,7 +54,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
                SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
-               SLAB_FAILSLAB | SLAB_KASAN)
+               SLAB_FAILSLAB | kasan_never_merge())
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
                         SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -1176,7 +1177,7 @@ size_t ksize(const void *objp)
         * We assume that ksize callers could use whole allocated area,
         * so we need to unpoison this area.
         */
-       kasan_unpoison_shadow(objp, size);
+       kasan_unpoison_range(objp, size);
        return size;
 }
 EXPORT_SYMBOL(ksize);
index 4552319..0c8b43a 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -249,7 +249,7 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
 {
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
        /*
-        * When CONFIG_KASAN_SW_TAGS is enabled, ptr_addr might be tagged.
+        * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
         * Normally, this doesn't cause any issues, as both set_freepointer()
         * and get_freepointer() are called with a pointer with the same tag.
         * However, there are some issues with CONFIG_SLUB_DEBUG code. For
@@ -275,6 +275,7 @@ static inline void *freelist_dereference(const struct kmem_cache *s,
 
 static inline void *get_freepointer(struct kmem_cache *s, void *object)
 {
+       object = kasan_reset_tag(object);
        return freelist_dereference(s, object + s->offset);
 }
 
@@ -304,6 +305,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
        BUG_ON(object == fp); /* naive detection of double free or corruption */
 #endif
 
+       freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
        *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
 }
 
@@ -538,8 +540,8 @@ static void print_section(char *level, char *text, u8 *addr,
                          unsigned int length)
 {
        metadata_access_enable();
-       print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
-                       length, 1);
+       print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS,
+                       16, 1, addr, length, 1);
        metadata_access_disable();
 }
 
@@ -570,7 +572,7 @@ static struct track *get_track(struct kmem_cache *s, void *object,
 
        p = object + get_info_end(s);
 
-       return p + alloc;
+       return kasan_reset_tag(p + alloc);
 }
 
 static void set_track(struct kmem_cache *s, void *object,
@@ -583,7 +585,8 @@ static void set_track(struct kmem_cache *s, void *object,
                unsigned int nr_entries;
 
                metadata_access_enable();
-               nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
+               nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
+                                             TRACK_ADDRS_COUNT, 3);
                metadata_access_disable();
 
                if (nr_entries < TRACK_ADDRS_COUNT)
@@ -747,7 +750,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
 
 static void init_object(struct kmem_cache *s, void *object, u8 val)
 {
-       u8 *p = object;
+       u8 *p = kasan_reset_tag(object);
 
        if (s->flags & SLAB_RED_ZONE)
                memset(p - s->red_left_pad, val, s->red_left_pad);
@@ -777,7 +780,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
        u8 *addr = page_address(page);
 
        metadata_access_enable();
-       fault = memchr_inv(start, value, bytes);
+       fault = memchr_inv(kasan_reset_tag(start), value, bytes);
        metadata_access_disable();
        if (!fault)
                return 1;
@@ -873,7 +876,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
 
        pad = end - remainder;
        metadata_access_enable();
-       fault = memchr_inv(pad, POISON_INUSE, remainder);
+       fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
        metadata_access_disable();
        if (!fault)
                return 1;
@@ -1118,7 +1121,7 @@ void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
                return;
 
        metadata_access_enable();
-       memset(addr, POISON_INUSE, page_size(page));
+       memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
        metadata_access_disable();
 }
 
@@ -1566,10 +1569,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                         * Clear the object and the metadata, but don't touch
                         * the redzone.
                         */
-                       memset(object, 0, s->object_size);
+                       memset(kasan_reset_tag(object), 0, s->object_size);
                        rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
                                                           : 0;
-                       memset((char *)object + s->inuse, 0,
+                       memset((char *)kasan_reset_tag(object) + s->inuse, 0,
                               s->size - s->inuse - rsize);
 
                }
@@ -2881,10 +2884,10 @@ redo:
                stat(s, ALLOC_FASTPATH);
        }
 
-       maybe_wipe_obj_freeptr(s, object);
+       maybe_wipe_obj_freeptr(s, kasan_reset_tag(object));
 
        if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
-               memset(object, 0, s->object_size);
+               memset(kasan_reset_tag(object), 0, s->object_size);
 
        slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
 
index 4ddb6e1..8c9b7d1 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -311,6 +311,18 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
+/*
+ * Change backing file, only valid to use during initial VMA setup.
+ */
+void vma_set_file(struct vm_area_struct *vma, struct file *file)
+{
+       /* Changing an anonymous vma with this is illegal */
+       get_file(file);
+       swap(vma->vm_file, file);
+       fput(file);
+}
+EXPORT_SYMBOL(vma_set_file);
+
 #ifndef STACK_RND_MASK
 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 #endif
index 785a7bb..4f62f29 100644 (file)
@@ -903,6 +903,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
        fid->clnt = clnt;
        fid->rdir = NULL;
        fid->fid = 0;
+       refcount_set(&fid->count, 1);
 
        idr_preload(GFP_KERNEL);
        spin_lock_irq(&clnt->lock);
@@ -910,7 +911,6 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
                            GFP_NOWAIT);
        spin_unlock_irq(&clnt->lock);
        idr_preload_end();
-
        if (!ret)
                return fid;
 
@@ -1189,7 +1189,6 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
 
        p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n",
                 oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);
-
        req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,
                                                                nwname, wnames);
        if (IS_ERR(req)) {
@@ -1221,7 +1220,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
        if (nwname)
                memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
        else
-               fid->qid = oldfid->qid;
+               memmove(&fid->qid, &oldfid->qid, sizeof(struct p9_qid));
 
        kfree(wqids);
        return fid;
@@ -1274,6 +1273,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
                p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",  qid.type,
                (unsigned long long)qid.path, qid.version, iounit);
 
+       memmove(&fid->qid, &qid, sizeof(struct p9_qid));
        fid->mode = mode;
        fid->iounit = iounit;
 
@@ -1319,6 +1319,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32
                        (unsigned long long)qid->path,
                        qid->version, iounit);
 
+       memmove(&ofid->qid, qid, sizeof(struct p9_qid));
        ofid->mode = mode;
        ofid->iounit = iounit;
 
@@ -1364,6 +1365,7 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
                                (unsigned long long)qid.path,
                                qid.version, iounit);
 
+       memmove(&fid->qid, &qid, sizeof(struct p9_qid));
        fid->mode = mode;
        fid->iounit = iounit;
 
@@ -1460,12 +1462,14 @@ int p9_client_clunk(struct p9_fid *fid)
        struct p9_req_t *req;
        int retries = 0;
 
-       if (!fid) {
-               pr_warn("%s (%d): Trying to clunk with NULL fid\n",
+       if (!fid || IS_ERR(fid)) {
+               pr_warn("%s (%d): Trying to clunk with invalid fid\n",
                        __func__, task_pid_nr(current));
                dump_stack();
                return 0;
        }
+       if (!refcount_dec_and_test(&fid->count))
+               return 0;
 
 again:
        p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid,
index 6baee12..d538255 100644 (file)
@@ -61,7 +61,6 @@ endif
 ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),)
 
 KBUILD_CFLAGS += -Wdisabled-optimization
-KBUILD_CFLAGS += -Wnested-externs
 KBUILD_CFLAGS += -Wshadow
 KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
 KBUILD_CFLAGS += -Wmissing-field-initializers
index 9413370..213677a 100644 (file)
@@ -148,10 +148,12 @@ endif
 # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE)
 #
 ifeq ($(CONFIG_KASAN),y)
+ifneq ($(CONFIG_KASAN_HW_TAGS),y)
 _c_flags += $(if $(patsubst n%,, \
                $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \
                $(CFLAGS_KASAN), $(CFLAGS_KASAN_NOSANITIZE))
 endif
+endif
 
 ifeq ($(CONFIG_UBSAN),y)
 _c_flags += $(if $(patsubst n%,, \
index d7ca46c..652e954 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Copyright 2004 Matt Mackall <mpm@selenic.com>
 #
index eee5b7f..8c8d7c3 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # SPDX-License-Identifier: GPL-2.0
 # Manipulate options in a .config file from the command line
 
index 89abf77..627eba5 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 #
 # diffconfig - a tool to compare .config files.
index 057c6ca..b85e097 100644 (file)
@@ -32,6 +32,9 @@ static struct resword {
        { "restrict", RESTRICT_KEYW },
        { "asm", ASM_KEYW },
 
+       // c11 keywords that can be used at module scope
+       { "_Static_assert", STATIC_ASSERT_KEYW },
+
        // attribute commented out in modutils 2.4.2.  People are using 'attribute' as a
        // field name which breaks the genksyms parser.  It is not a gcc keyword anyway.
        // KAO. },
index e265c5d..ae76472 100644 (file)
@@ -118,7 +118,7 @@ yylex(void)
 {
   static enum {
     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
-    ST_BRACKET, ST_BRACE, ST_EXPRESSION,
+    ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
     ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
     ST_TABLE_5, ST_TABLE_6
   } lexstate = ST_NOTSTARTED;
@@ -201,6 +201,11 @@ repeat:
 
                  case EXPORT_SYMBOL_KEYW:
                      goto fini;
+
+                 case STATIC_ASSERT_KEYW:
+                   lexstate = ST_STATIC_ASSERT;
+                   count = 0;
+                   goto repeat;
                  }
              }
            if (!suppress_type_lookup)
@@ -401,6 +406,26 @@ repeat:
        }
       break;
 
+    case ST_STATIC_ASSERT:
+      APP;
+      switch (token)
+       {
+       case '(':
+         ++count;
+         goto repeat;
+       case ')':
+         if (--count == 0)
+           {
+             lexstate = ST_NORMAL;
+             token = STATIC_ASSERT_PHRASE;
+             break;
+           }
+         goto repeat;
+       default:
+         goto repeat;
+       }
+      break;
+
     case ST_TABLE_1:
       goto repeat;
 
index e22b422..8e9b5e6 100644 (file)
@@ -80,6 +80,7 @@ static void record_compound(struct string_list **keyw,
 %token SHORT_KEYW
 %token SIGNED_KEYW
 %token STATIC_KEYW
+%token STATIC_ASSERT_KEYW
 %token STRUCT_KEYW
 %token TYPEDEF_KEYW
 %token UNION_KEYW
@@ -97,6 +98,7 @@ static void record_compound(struct string_list **keyw,
 %token BRACE_PHRASE
 %token BRACKET_PHRASE
 %token EXPRESSION_PHRASE
+%token STATIC_ASSERT_PHRASE
 
 %token CHAR
 %token DOTS
@@ -130,6 +132,7 @@ declaration1:
        | function_definition
        | asm_definition
        | export_definition
+       | static_assert
        | error ';'                             { $$ = $2; }
        | error '}'                             { $$ = $2; }
        ;
@@ -493,6 +496,10 @@ export_definition:
                { export_symbol((*$3)->string); $$ = $5; }
        ;
 
+/* Ignore any module scoped _Static_assert(...) */
+static_assert:
+       STATIC_ASSERT_PHRASE ';'                        { $$ = $2; }
+       ;
 
 %%
 
index 68dab82..92d9aa6 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 
 use strict;
index f6e548b..db03e2f 100644 (file)
@@ -11,7 +11,6 @@
 #include <time.h>
 #include <unistd.h>
 #include <getopt.h>
-#include <sys/stat.h>
 #include <sys/time.h>
 #include <errno.h>
 
index a39d93e..2568dbe 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <sys/mman.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
index 240109f..9c22cb5 100644 (file)
@@ -12,7 +12,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 
 #include "lkc.h"
 #include "parser.tab.h"
index 8454649..bee2413 100644 (file)
@@ -6,6 +6,10 @@
 #ifndef LKC_H
 #define LKC_H
 
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
 #include "expr.h"
 
 #ifdef __cplusplus
index 0243086..0590f86 100644 (file)
@@ -114,7 +114,7 @@ static char *do_error_if(int argc, char *argv[])
        if (!strcmp(argv[0], "y"))
                pperror("%s", argv[1]);
 
-       return NULL;
+       return xstrdup("");
 }
 
 static char *do_filename(int argc, char *argv[])
index 02ccc0a..fa564cd 100755 (executable)
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 PKG="Qt5Core Qt5Gui Qt5Widgets"
-PKG2="QtCore QtGui"
 
 if [ -z "$(command -v pkg-config)" ]; then
        echo >&2 "*"
@@ -12,21 +11,14 @@ if [ -z "$(command -v pkg-config)" ]; then
 fi
 
 if pkg-config --exists $PKG; then
-       echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags Qt5Core Qt5Gui Qt5Widgets)\"
+       echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags $PKG)\"
        echo libs=\"$(pkg-config --libs $PKG)\"
        echo moc=\"$(pkg-config --variable=host_bins Qt5Core)/moc\"
        exit 0
 fi
 
-if pkg-config --exists $PKG2; then
-       echo cflags=\"$(pkg-config --cflags $PKG2)\"
-       echo libs=\"$(pkg-config --libs $PKG2)\"
-       echo moc=\"$(pkg-config --variable=moc_location QtCore)\"
-       exit 0
-fi
-
 echo >&2 "*"
-echo >&2 "* Could not find Qt via pkg-config."
-echo >&2 "* Please install either Qt 4.8 or 5.x. and make sure it's in PKG_CONFIG_PATH"
+echo >&2 "* Could not find Qt5 via pkg-config."
+echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH"
 echo >&2 "*"
 exit 1
index f7eb093..d000869 100644 (file)
@@ -310,15 +310,16 @@ ConfigList::ConfigList(QWidget *parent, const char *name)
 
        setHeaderLabels(QStringList() << "Option" << "Name" << "Value");
 
-       connect(this, SIGNAL(itemSelectionChanged(void)),
-               SLOT(updateSelection(void)));
+       connect(this, &ConfigList::itemSelectionChanged,
+               this, &ConfigList::updateSelection);
 
        if (name) {
                configSettings->beginGroup(name);
                showName = configSettings->value("/showName", false).toBool();
                optMode = (enum optionMode)configSettings->value("/optionMode", 0).toInt();
                configSettings->endGroup();
-               connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings()));
+               connect(configApp, &QApplication::aboutToQuit,
+                       this, &ConfigList::saveSettings);
        }
 
        showColumn(promptColIdx);
@@ -888,10 +889,10 @@ void ConfigList::contextMenuEvent(QContextMenuEvent *e)
                headerPopup = new QMenu(this);
                action = new QAction("Show Name", this);
                action->setCheckable(true);
-               connect(action, SIGNAL(toggled(bool)),
-                       SLOT(setShowName(bool)));
-               connect(this, SIGNAL(showNameChanged(bool)),
-                       action, SLOT(setChecked(bool)));
+               connect(action, &QAction::toggled,
+                       this, &ConfigList::setShowName);
+               connect(this, &ConfigList::showNameChanged,
+                       action, &QAction::setChecked);
                action->setChecked(showName);
                headerPopup->addAction(action);
        }
@@ -936,15 +937,18 @@ ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name)
                configSettings->beginGroup(objectName());
                setShowDebug(configSettings->value("/showDebug", false).toBool());
                configSettings->endGroup();
-               connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings()));
+               connect(configApp, &QApplication::aboutToQuit,
+                       this, &ConfigInfoView::saveSettings);
        }
 
        contextMenu = createStandardContextMenu();
        QAction *action = new QAction("Show Debug Info", contextMenu);
 
        action->setCheckable(true);
-       connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool)));
-       connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setChecked(bool)));
+       connect(action, &QAction::toggled,
+               this, &ConfigInfoView::setShowDebug);
+       connect(this, &ConfigInfoView::showDebugChanged,
+               action, &QAction::setChecked);
        action->setChecked(showDebug());
        contextMenu->addSeparator();
        contextMenu->addAction(action);
@@ -1231,11 +1235,13 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent)
        layout2->setSpacing(6);
        layout2->addWidget(new QLabel("Find:", this));
        editField = new QLineEdit(this);
-       connect(editField, SIGNAL(returnPressed()), SLOT(search()));
+       connect(editField, &QLineEdit::returnPressed,
+               this, &ConfigSearchWindow::search);
        layout2->addWidget(editField);
        searchButton = new QPushButton("Search", this);
        searchButton->setAutoDefault(false);
-       connect(searchButton, SIGNAL(clicked()), SLOT(search()));
+       connect(searchButton, &QPushButton::clicked,
+               this, &ConfigSearchWindow::search);
        layout2->addWidget(searchButton);
        layout1->addLayout(layout2);
 
@@ -1244,10 +1250,10 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent)
        list = new ConfigList(split, "search");
        list->mode = listMode;
        info = new ConfigInfoView(split, "search");
-       connect(list, SIGNAL(menuChanged(struct menu *)),
-               info, SLOT(setInfo(struct menu *)));
-       connect(list, SIGNAL(menuChanged(struct menu *)),
-               parent, SLOT(setMenuLink(struct menu *)));
+       connect(list, &ConfigList::menuChanged,
+               info, &ConfigInfoView::setInfo);
+       connect(list, &ConfigList::menuChanged,
+               parent, &ConfigMainWindow::setMenuLink);
 
        layout1->addWidget(split);
 
@@ -1267,7 +1273,8 @@ ConfigSearchWindow::ConfigSearchWindow(ConfigMainWindow *parent)
        if (ok)
                split->setSizes(sizes);
        configSettings->endGroup();
-       connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings()));
+       connect(configApp, &QApplication::aboutToQuit,
+               this, &ConfigSearchWindow::saveSettings);
 }
 
 void ConfigSearchWindow::saveSettings(void)
@@ -1367,19 +1374,23 @@ ConfigMainWindow::ConfigMainWindow(void)
        configList->setFocus();
 
        backAction = new QAction(QPixmap(xpm_back), "Back", this);
-       connect(backAction, SIGNAL(triggered(bool)), SLOT(goBack()));
+       connect(backAction, &QAction::triggered,
+               this, &ConfigMainWindow::goBack);
 
        QAction *quitAction = new QAction("&Quit", this);
        quitAction->setShortcut(Qt::CTRL + Qt::Key_Q);
-       connect(quitAction, SIGNAL(triggered(bool)), SLOT(close()));
+       connect(quitAction, &QAction::triggered,
+               this, &ConfigMainWindow::close);
 
        QAction *loadAction = new QAction(QPixmap(xpm_load), "&Load", this);
        loadAction->setShortcut(Qt::CTRL + Qt::Key_L);
-       connect(loadAction, SIGNAL(triggered(bool)), SLOT(loadConfig()));
+       connect(loadAction, &QAction::triggered,
+               this, &ConfigMainWindow::loadConfig);
 
        saveAction = new QAction(QPixmap(xpm_save), "&Save", this);
        saveAction->setShortcut(Qt::CTRL + Qt::Key_S);
-       connect(saveAction, SIGNAL(triggered(bool)), SLOT(saveConfig()));
+       connect(saveAction, &QAction::triggered,
+               this, &ConfigMainWindow::saveConfig);
 
        conf_set_changed_callback(conf_changed);
 
@@ -1388,31 +1399,37 @@ ConfigMainWindow::ConfigMainWindow(void)
        configname = xstrdup(conf_get_configname());
 
        QAction *saveAsAction = new QAction("Save &As...", this);
-         connect(saveAsAction, SIGNAL(triggered(bool)), SLOT(saveConfigAs()));
+       connect(saveAsAction, &QAction::triggered,
+               this, &ConfigMainWindow::saveConfigAs);
        QAction *searchAction = new QAction("&Find", this);
        searchAction->setShortcut(Qt::CTRL + Qt::Key_F);
-         connect(searchAction, SIGNAL(triggered(bool)), SLOT(searchConfig()));
+       connect(searchAction, &QAction::triggered,
+               this, &ConfigMainWindow::searchConfig);
        singleViewAction = new QAction(QPixmap(xpm_single_view), "Single View", this);
        singleViewAction->setCheckable(true);
-         connect(singleViewAction, SIGNAL(triggered(bool)), SLOT(showSingleView()));
+       connect(singleViewAction, &QAction::triggered,
+               this, &ConfigMainWindow::showSingleView);
        splitViewAction = new QAction(QPixmap(xpm_split_view), "Split View", this);
        splitViewAction->setCheckable(true);
-         connect(splitViewAction, SIGNAL(triggered(bool)), SLOT(showSplitView()));
+       connect(splitViewAction, &QAction::triggered,
+               this, &ConfigMainWindow::showSplitView);
        fullViewAction = new QAction(QPixmap(xpm_tree_view), "Full View", this);
        fullViewAction->setCheckable(true);
-         connect(fullViewAction, SIGNAL(triggered(bool)), SLOT(showFullView()));
+       connect(fullViewAction, &QAction::triggered,
+               this, &ConfigMainWindow::showFullView);
 
        QAction *showNameAction = new QAction("Show Name", this);
          showNameAction->setCheckable(true);
-       connect(showNameAction, SIGNAL(toggled(bool)), configList, SLOT(setShowName(bool)));
+       connect(showNameAction, &QAction::toggled,
+               configList, &ConfigList::setShowName);
        showNameAction->setChecked(configList->showName);
 
        QActionGroup *optGroup = new QActionGroup(this);
        optGroup->setExclusive(true);
-       connect(optGroup, SIGNAL(triggered(QAction*)), configList,
-               SLOT(setOptionMode(QAction *)));
-       connect(optGroup, SIGNAL(triggered(QAction *)), menuList,
-               SLOT(setOptionMode(QAction *)));
+       connect(optGroup, &QActionGroup::triggered,
+               configList, &ConfigList::setOptionMode);
+       connect(optGroup, &QActionGroup::triggered,
+               menuList, &ConfigList::setOptionMode);
 
        ConfigList::showNormalAction = new QAction("Show Normal Options", optGroup);
        ConfigList::showNormalAction->setCheckable(true);
@@ -1423,13 +1440,16 @@ ConfigMainWindow::ConfigMainWindow(void)
 
        QAction *showDebugAction = new QAction("Show Debug Info", this);
          showDebugAction->setCheckable(true);
-         connect(showDebugAction, SIGNAL(toggled(bool)), helpText, SLOT(setShowDebug(bool)));
+       connect(showDebugAction, &QAction::toggled,
+               helpText, &ConfigInfoView::setShowDebug);
          showDebugAction->setChecked(helpText->showDebug());
 
        QAction *showIntroAction = new QAction("Introduction", this);
-         connect(showIntroAction, SIGNAL(triggered(bool)), SLOT(showIntro()));
+       connect(showIntroAction, &QAction::triggered,
+               this, &ConfigMainWindow::showIntro);
        QAction *showAboutAction = new QAction("About", this);
-         connect(showAboutAction, SIGNAL(triggered(bool)), SLOT(showAbout()));
+       connect(showAboutAction, &QAction::triggered,
+               this, &ConfigMainWindow::showAbout);
 
        // init tool bar
        QToolBar *toolBar = addToolBar("Tools");
@@ -1467,30 +1487,30 @@ ConfigMainWindow::ConfigMainWindow(void)
        menu->addAction(showIntroAction);
        menu->addAction(showAboutAction);
 
-       connect (helpText, SIGNAL (anchorClicked (const QUrl &)),
-                helpText, SLOT (clicked (const QUrl &)) );
-
-       connect(configList, SIGNAL(menuChanged(struct menu *)),
-               helpText, SLOT(setInfo(struct menu *)));
-       connect(configList, SIGNAL(menuSelected(struct menu *)),
-               SLOT(changeMenu(struct menu *)));
-       connect(configList, SIGNAL(itemSelected(struct menu *)),
-               SLOT(changeItens(struct menu *)));
-       connect(configList, SIGNAL(parentSelected()),
-               SLOT(goBack()));
-       connect(menuList, SIGNAL(menuChanged(struct menu *)),
-               helpText, SLOT(setInfo(struct menu *)));
-       connect(menuList, SIGNAL(menuSelected(struct menu *)),
-               SLOT(changeMenu(struct menu *)));
-
-       connect(configList, SIGNAL(gotFocus(struct menu *)),
-               helpText, SLOT(setInfo(struct menu *)));
-       connect(menuList, SIGNAL(gotFocus(struct menu *)),
-               helpText, SLOT(setInfo(struct menu *)));
-       connect(menuList, SIGNAL(gotFocus(struct menu *)),
-               SLOT(listFocusChanged(void)));
-       connect(helpText, SIGNAL(menuSelected(struct menu *)),
-               SLOT(setMenuLink(struct menu *)));
+       connect(helpText, &ConfigInfoView::anchorClicked,
+               helpText, &ConfigInfoView::clicked);
+
+       connect(configList, &ConfigList::menuChanged,
+               helpText, &ConfigInfoView::setInfo);
+       connect(configList, &ConfigList::menuSelected,
+               this, &ConfigMainWindow::changeMenu);
+       connect(configList, &ConfigList::itemSelected,
+               this, &ConfigMainWindow::changeItens);
+       connect(configList, &ConfigList::parentSelected,
+               this, &ConfigMainWindow::goBack);
+       connect(menuList, &ConfigList::menuChanged,
+               helpText, &ConfigInfoView::setInfo);
+       connect(menuList, &ConfigList::menuSelected,
+               this, &ConfigMainWindow::changeMenu);
+
+       connect(configList, &ConfigList::gotFocus,
+               helpText, &ConfigInfoView::setInfo);
+       connect(menuList, &ConfigList::gotFocus,
+               helpText, &ConfigInfoView::setInfo);
+       connect(menuList, &ConfigList::gotFocus,
+               this, &ConfigMainWindow::listFocusChanged);
+       connect(helpText, &ConfigInfoView::menuSelected,
+               this, &ConfigMainWindow::setMenuLink);
 
        QString listMode = configSettings->value("/listMode", "symbol").toString();
        if (listMode == "single")
@@ -1779,10 +1799,13 @@ void ConfigMainWindow::showIntro(void)
 void ConfigMainWindow::showAbout(void)
 {
        static const QString str = "qconf is Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>.\n"
-               "Copyright (C) 2015 Boris Barbulovski <bbarbulovski@gmail.com>.\n\n"
-               "Bug reports and feature request can also be entered at http://bugzilla.kernel.org/\n";
+               "Copyright (C) 2015 Boris Barbulovski <bbarbulovski@gmail.com>.\n"
+               "\n"
+               "Bug reports and feature request can also be entered at http://bugzilla.kernel.org/\n"
+               "\n"
+               "Qt Version: ";
 
-       QMessageBox::information(this, "qconf", str);
+       QMessageBox::information(this, "qconf", str + qVersion());
 }
 
 void ConfigMainWindow::saveSettings(void)
index ffa3ec6..fe38e6f 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>
  */
 
+#include <sys/types.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include <regex.h>
-#include <sys/utsname.h>
 
 #include "lkc.h"
 
index f882ce0..d6c8165 100644 (file)
@@ -34,12 +34,14 @@ static int external_module = 0;
 static int warn_unresolved = 0;
 /* How a symbol is exported */
 static int sec_mismatch_count = 0;
-static int sec_mismatch_fatal = 0;
+static int sec_mismatch_warn_only = true;
 /* ignore missing files */
 static int ignore_missing_files;
 /* If set to 1, only warn (instead of error) about missing ns imports */
 static int allow_missing_ns_imports;
 
+static bool error_occurred;
+
 enum export {
        export_plain,      export_unused,     export_gpl,
        export_unused_gpl, export_gpl_future, export_unknown
@@ -78,6 +80,8 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...)
 
        if (loglevel == LOG_FATAL)
                exit(1);
+       if (loglevel == LOG_ERROR)
+               error_occurred = true;
 }
 
 static inline bool strends(const char *str, const char *postfix)
@@ -403,8 +407,8 @@ static void sym_update_namespace(const char *symname, const char *namespace)
         * actually an assertion.
         */
        if (!s) {
-               merror("Could not update namespace(%s) for symbol %s\n",
-                      namespace, symname);
+               error("Could not update namespace(%s) for symbol %s\n",
+                     namespace, symname);
                return;
        }
 
@@ -2014,7 +2018,7 @@ static void read_symbols(const char *modname)
        if (!mod->is_vmlinux) {
                license = get_modinfo(&info, "license");
                if (!license)
-                       warn("missing MODULE_LICENSE() in %s\n", modname);
+                       error("missing MODULE_LICENSE() in %s\n", modname);
                while (license) {
                        if (license_is_gpl_compatible(license))
                                mod->gpl_compatible = 1;
@@ -2141,11 +2145,11 @@ static void check_for_gpl_usage(enum export exp, const char *m, const char *s)
 {
        switch (exp) {
        case export_gpl:
-               fatal("GPL-incompatible module %s.ko uses GPL-only symbol '%s'\n",
+               error("GPL-incompatible module %s.ko uses GPL-only symbol '%s'\n",
                      m, s);
                break;
        case export_unused_gpl:
-               fatal("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n",
+               error("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n",
                      m, s);
                break;
        case export_gpl_future:
@@ -2174,22 +2178,18 @@ static void check_for_unused(enum export exp, const char *m, const char *s)
        }
 }
 
-static int check_exports(struct module *mod)
+static void check_exports(struct module *mod)
 {
        struct symbol *s, *exp;
-       int err = 0;
 
        for (s = mod->unres; s; s = s->next) {
                const char *basename;
                exp = find_symbol(s->name);
                if (!exp || exp->module == mod) {
-                       if (have_vmlinux && !s->weak) {
+                       if (have_vmlinux && !s->weak)
                                modpost_log(warn_unresolved ? LOG_WARN : LOG_ERROR,
                                            "\"%s\" [%s.ko] undefined!\n",
                                            s->name, mod->name);
-                               if (!warn_unresolved)
-                                       err = 1;
-                       }
                        continue;
                }
                basename = strrchr(mod->name, '/');
@@ -2203,8 +2203,6 @@ static int check_exports(struct module *mod)
                        modpost_log(allow_missing_ns_imports ? LOG_WARN : LOG_ERROR,
                                    "module %s uses symbol %s from namespace %s, but does not import it.\n",
                                    basename, exp->name, exp->namespace);
-                       if (!allow_missing_ns_imports)
-                               err = 1;
                        add_namespace(&mod->missing_namespaces, exp->namespace);
                }
 
@@ -2212,11 +2210,9 @@ static int check_exports(struct module *mod)
                        check_for_gpl_usage(exp->export, basename, exp->name);
                check_for_unused(exp->export, basename, exp->name);
        }
-
-       return err;
 }
 
-static int check_modname_len(struct module *mod)
+static void check_modname_len(struct module *mod)
 {
        const char *mod_name;
 
@@ -2225,12 +2221,8 @@ static int check_modname_len(struct module *mod)
                mod_name = mod->name;
        else
                mod_name++;
-       if (strlen(mod_name) >= MODULE_NAME_LEN) {
-               merror("module name is too long [%s.ko]\n", mod->name);
-               return 1;
-       }
-
-       return 0;
+       if (strlen(mod_name) >= MODULE_NAME_LEN)
+               error("module name is too long [%s.ko]\n", mod->name);
 }
 
 /**
@@ -2289,10 +2281,9 @@ static void add_staging_flag(struct buffer *b, const char *name)
 /**
  * Record CRCs for unresolved symbols
  **/
-static int add_versions(struct buffer *b, struct module *mod)
+static void add_versions(struct buffer *b, struct module *mod)
 {
        struct symbol *s, *exp;
-       int err = 0;
 
        for (s = mod->unres; s; s = s->next) {
                exp = find_symbol(s->name);
@@ -2304,7 +2295,7 @@ static int add_versions(struct buffer *b, struct module *mod)
        }
 
        if (!modversions)
-               return err;
+               return;
 
        buf_printf(b, "\n");
        buf_printf(b, "static const struct modversion_info ____versions[]\n");
@@ -2319,9 +2310,8 @@ static int add_versions(struct buffer *b, struct module *mod)
                        continue;
                }
                if (strlen(s->name) >= MODULE_NAME_LEN) {
-                       merror("too long symbol \"%s\" [%s.ko]\n",
-                              s->name, mod->name);
-                       err = 1;
+                       error("too long symbol \"%s\" [%s.ko]\n",
+                             s->name, mod->name);
                        break;
                }
                buf_printf(b, "\t{ %#8x, \"%s\" },\n",
@@ -2329,8 +2319,6 @@ static int add_versions(struct buffer *b, struct module *mod)
        }
 
        buf_printf(b, "};\n");
-
-       return err;
 }
 
 static void add_depends(struct buffer *b, struct module *mod)
@@ -2554,7 +2542,6 @@ int main(int argc, char **argv)
        char *missing_namespace_deps = NULL;
        char *dump_write = NULL, *files_source = NULL;
        int opt;
-       int err;
        int n;
        struct dump_list *dump_read_start = NULL;
        struct dump_list **dump_read_iter = &dump_read_start;
@@ -2589,7 +2576,7 @@ int main(int argc, char **argv)
                        warn_unresolved = 1;
                        break;
                case 'E':
-                       sec_mismatch_fatal = 1;
+                       sec_mismatch_warn_only = false;
                        break;
                case 'N':
                        allow_missing_ns_imports = 1;
@@ -2624,8 +2611,6 @@ int main(int argc, char **argv)
        if (!have_vmlinux)
                warn("Symbol info of vmlinux is missing. Unresolved symbol check will be entirely skipped.\n");
 
-       err = 0;
-
        for (mod = modules; mod; mod = mod->next) {
                char fname[PATH_MAX];
 
@@ -2634,14 +2619,14 @@ int main(int argc, char **argv)
 
                buf.pos = 0;
 
-               err |= check_modname_len(mod);
-               err |= check_exports(mod);
+               check_modname_len(mod);
+               check_exports(mod);
 
                add_header(&buf, mod);
                add_intree_flag(&buf, !external_module);
                add_retpoline(&buf);
                add_staging_flag(&buf, mod->name);
-               err |= add_versions(&buf, mod);
+               add_versions(&buf, mod);
                add_depends(&buf, mod);
                add_moddevtable(&buf, mod);
                add_srcversion(&buf, mod);
@@ -2655,21 +2640,21 @@ int main(int argc, char **argv)
 
        if (dump_write)
                write_dump(dump_write);
-       if (sec_mismatch_count && sec_mismatch_fatal)
-               fatal("Section mismatches detected.\n"
+       if (sec_mismatch_count && !sec_mismatch_warn_only)
+               error("Section mismatches detected.\n"
                      "Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.\n");
        for (n = 0; n < SYMBOL_HASH_SIZE; n++) {
                struct symbol *s;
 
                for (s = symbolhash[n]; s; s = s->next) {
                        if (s->is_static)
-                               warn("\"%s\" [%s] is a static %s\n",
-                                    s->name, s->module->name,
-                                    export_str(s->export));
+                               error("\"%s\" [%s] is a static %s\n",
+                                     s->name, s->module->name,
+                                     export_str(s->export));
                }
        }
 
        free(buf.p);
 
-       return err;
+       return error_occurred ? 1 : 0;
 }
index 3aa0527..e6f46ee 100644 (file)
@@ -201,6 +201,19 @@ enum loglevel {
 
 void modpost_log(enum loglevel loglevel, const char *fmt, ...);
 
+/*
+ * warn - show the given message, then let modpost continue running, still
+ *        allowing modpost to exit successfully. This should be used when
+ *        we still allow to generate vmlinux and modules.
+ *
+ * error - show the given message, then let modpost continue running, but fail
+ *         in the end. This should be used when we should stop building vmlinux
+ *         or modules, but we can continue running modpost to catch as many
+ *         issues as possible.
+ *
+ * fatal - show the given message, and bail out immediately. This should be
+ *         used when there is no point to continue running modpost.
+ */
 #define warn(fmt, args...)     modpost_log(LOG_WARN, fmt, ##args)
-#define merror(fmt, args...)   modpost_log(LOG_ERROR, fmt, ##args)
+#define error(fmt, args...)    modpost_log(LOG_ERROR, fmt, ##args)
 #define fatal(fmt, args...)    modpost_log(LOG_FATAL, fmt, ##args)
index 2643993..28e67e1 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # show_deltas: Read list of printk messages instrumented with
index 40fa692..828a861 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0-or-later
 use strict;
 
index c3db607..96bd99d 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 #
 # Author: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
index b657357..74f8aad 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 
 """
index 764dbe6..75aec71 100644 (file)
@@ -149,8 +149,6 @@ static void release_card_device(struct device *dev)
  *  @extra_size: allocate this extra size after the main soundcard structure
  *  @card_ret: the pointer to store the created card instance
  *
- *  Creates and initializes a soundcard structure.
- *
  *  The function allocates snd_card instance via kzalloc with the given
  *  space for the driver to use freely.  The allocated struct is stored
  *  in the given card_ret pointer.
index 0aeeb62..966bef5 100644 (file)
@@ -77,7 +77,8 @@ static void snd_malloc_dev_iram(struct snd_dma_buffer *dmab, size_t size)
        /* Assign the pool into private_data field */
        dmab->private_data = pool;
 
-       dmab->area = gen_pool_dma_alloc(pool, size, &dmab->addr);
+       dmab->area = gen_pool_dma_alloc_align(pool, size, &dmab->addr,
+                                       PAGE_SIZE);
 }
 
 /**
@@ -132,6 +133,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size,
        if (WARN_ON(!dmab))
                return -ENXIO;
 
+       size = PAGE_ALIGN(size);
        dmab->dev.type = type;
        dmab->dev.dev = device;
        dmab->bytes = 0;
index de19174..142fc75 100644 (file)
@@ -693,6 +693,8 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
 
        oss_buffer_size = snd_pcm_plug_client_size(substream,
                                                   snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size;
+       if (!oss_buffer_size)
+               return -EINVAL;
        oss_buffer_size = rounddown_pow_of_two(oss_buffer_size);
        if (atomic_read(&substream->mmap_count)) {
                if (oss_buffer_size > runtime->oss.mmap_bytes)
@@ -728,17 +730,21 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream,
 
        min_period_size = snd_pcm_plug_client_size(substream,
                                                   snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL));
-       min_period_size *= oss_frame_size;
-       min_period_size = roundup_pow_of_two(min_period_size);
-       if (oss_period_size < min_period_size)
-               oss_period_size = min_period_size;
+       if (min_period_size) {
+               min_period_size *= oss_frame_size;
+               min_period_size = roundup_pow_of_two(min_period_size);
+               if (oss_period_size < min_period_size)
+                       oss_period_size = min_period_size;
+       }
 
        max_period_size = snd_pcm_plug_client_size(substream,
                                                   snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL));
-       max_period_size *= oss_frame_size;
-       max_period_size = rounddown_pow_of_two(max_period_size);
-       if (oss_period_size > max_period_size)
-               oss_period_size = max_period_size;
+       if (max_period_size) {
+               max_period_size *= oss_frame_size;
+               max_period_size = rounddown_pow_of_two(max_period_size);
+               if (oss_period_size > max_period_size)
+                       oss_period_size = max_period_size;
+       }
 
        oss_periods = oss_buffer_size / oss_period_size;
 
index 4f03ba8..ee6e9c5 100644 (file)
@@ -89,14 +89,6 @@ static int preallocate_pcm_pages(struct snd_pcm_substream *substream, size_t siz
        return 0;
 }
 
-/*
- * release the preallocated buffer if not yet done.
- */
-static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream)
-{
-       do_free_pages(substream->pcm->card, &substream->dma_buffer);
-}
-
 /**
  * snd_pcm_lib_preallocate_free - release the preallocated buffer of the specified substream.
  * @substream: the pcm substream instance
@@ -105,7 +97,7 @@ static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream
  */
 void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
 {
-       snd_pcm_lib_preallocate_dma_free(substream);
+       do_free_pages(substream->pcm->card, &substream->dma_buffer);
 }
 
 /**
index 47b155a..9f3f8e9 100644 (file)
@@ -755,8 +755,13 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
                runtime->boundary *= 2;
 
        /* clear the buffer for avoiding possible kernel info leaks */
-       if (runtime->dma_area && !substream->ops->copy_user)
-               memset(runtime->dma_area, 0, runtime->dma_bytes);
+       if (runtime->dma_area && !substream->ops->copy_user) {
+               size_t size = runtime->dma_bytes;
+
+               if (runtime->info & SNDRV_PCM_INFO_MMAP)
+                       size = PAGE_ALIGN(size);
+               memset(runtime->dma_area, 0, size);
+       }
 
        snd_pcm_timer_resolution_change(substream);
        snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP);
index 41cc640..dde5ba2 100644 (file)
@@ -2516,6 +2516,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950),
+       SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1275, "MSI-GL63", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950),
@@ -6368,6 +6369,7 @@ enum {
        ALC287_FIXUP_HP_GPIO_LED,
        ALC256_FIXUP_HP_HEADSET_MIC,
        ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
+       ALC282_FIXUP_ACER_DISABLE_LINEOUT,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -7791,6 +7793,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
        },
+       [ALC282_FIXUP_ACER_DISABLE_LINEOUT] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x411111f0 },
+                       { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { },
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7805,11 +7817,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
        SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
        SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
+       SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK),
        SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
        SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1025, 0x1166, "Acer Veriton N4640G", ALC269_FIXUP_LIFEBOOK),
+       SND_PCI_QUIRK(0x1025, 0x1167, "Acer Veriton N6640G", ALC269_FIXUP_LIFEBOOK),
        SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK),
        SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
        SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
@@ -7870,6 +7885,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1028, 0x0a58, "Dell Precision 3650 Tower", ALC255_FIXUP_DELL_HEADSET_MIC),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -8017,6 +8033,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK),
        SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8564,6 +8581,22 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60140},
                {0x19, 0x04a11030},
                {0x21, 0x04211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT,
+               ALC282_STANDARD_PINS,
+               {0x12, 0x90a609c0},
+               {0x18, 0x03a11830},
+               {0x19, 0x04a19831},
+               {0x1a, 0x0481303f},
+               {0x1b, 0x04211020},
+               {0x21, 0x0321101f}),
+       SND_HDA_PIN_QUIRK(0x10ec0282, 0x1025, "Acer", ALC282_FIXUP_ACER_DISABLE_LINEOUT,
+               ALC282_STANDARD_PINS,
+               {0x12, 0x90a60940},
+               {0x18, 0x03a11830},
+               {0x19, 0x04a19831},
+               {0x1a, 0x0481303f},
+               {0x1b, 0x04211020},
+               {0x21, 0x0321101f}),
        SND_HDA_PIN_QUIRK(0x10ec0283, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC282_STANDARD_PINS,
                {0x12, 0x90a60130},
index cb0b658..d731ca6 100644 (file)
@@ -383,6 +383,9 @@ static const struct usb_audio_device_name usb_audio_names[] = {
        /* ASUS ROG Strix */
        PROFILE_NAME(0x0b05, 0x1917,
                     "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),
+       /* ASUS PRIME TRX40 PRO-S */
+       PROFILE_NAME(0x0b05, 0x1918,
+                    "Realtek", "ALC1220-VB-DT", "Realtek-ALC1220-VB-Desktop"),
 
        /* Dell WD15 Dock */
        PROFILE_NAME(0x0bda, 0x4014, "Dell", "WD15 Dock", "Dell-WD15-Dock"),
index e940dce..31051f2 100644 (file)
@@ -534,6 +534,12 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip,
        }
 
        crate = data[0] | (data[1] << 8) | (data[2] << 16);
+       if (!crate) {
+               dev_info(&dev->dev, "failed to read current rate; disabling the check\n");
+               chip->sample_rate_read_error = 3; /* three strikes, see above */
+               return 0;
+       }
+
        if (crate != rate) {
                dev_warn(&dev->dev, "current rate %d is different from the runtime rate %d\n", crate, rate);
                // runtime->rate = crate;
index 4e911d2..eb3a4c4 100644 (file)
@@ -75,6 +75,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
        /* No quirk for playback but with capture quirk (see below) */
        IMPLICIT_FB_SKIP_DEV(0x0582, 0x0130),   /* BOSS BR-80 */
        IMPLICIT_FB_SKIP_DEV(0x0582, 0x0189),   /* BOSS GT-100v2 */
+       IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6),   /* BOSS GT-1 */
        IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8),   /* BOSS Katana */
        IMPLICIT_FB_SKIP_DEV(0x0582, 0x01e5),   /* BOSS GT-001 */
 
@@ -85,6 +86,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = {
 static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = {
        IMPLICIT_FB_FIXED_DEV(0x0582, 0x0130, 0x0d, 0x01), /* BOSS BR-80 */
        IMPLICIT_FB_FIXED_DEV(0x0582, 0x0189, 0x0d, 0x01), /* BOSS GT-100v2 */
+       IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */
        IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */
        IMPLICIT_FB_FIXED_DEV(0x0582, 0x01e5, 0x0d, 0x01), /* BOSS GT-001 */
 
index 63cdf3c..e4a690b 100644 (file)
@@ -1771,6 +1771,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case 0x25ce:  /* Mytek devices */
        case 0x278b:  /* Rotel? */
        case 0x292b:  /* Gustard/Ess based devices */
+       case 0x2972:  /* FiiO devices */
        case 0x2ab6:  /* T+A devices */
        case 0x3353:  /* Khadas devices */
        case 0x3842:  /* EVGA */
index dad350d..f5ef2d5 100644 (file)
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_TSC_ADJUST         ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX                        ( 9*32+ 2) /* Software Guard Extensions */
 #define X86_FEATURE_BMI1               ( 9*32+ 3) /* 1st group bit manipulation extensions */
 #define X86_FEATURE_HLE                        ( 9*32+ 4) /* Hardware Lock Elision */
 #define X86_FEATURE_AVX2               ( 9*32+ 5) /* AVX2 instructions */
 #define X86_FEATURE_MOVDIRI            (16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B          (16*32+28) /* MOVDIR64B instruction */
 #define X86_FEATURE_ENQCMD             (16*32+29) /* ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC             (16*32+30) /* Software Guard Extensions Launch Control */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV     (17*32+ 0) /* MCA overflow recovery support */
index 5861d34..7947cb1 100644 (file)
 # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
 #endif
 
+#ifdef CONFIG_X86_SGX
+# define DISABLE_SGX   0
+#else
+# define DISABLE_SGX   (1 << (X86_FEATURE_SGX & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -74,7 +80,7 @@
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 (DISABLE_PTI)
 #define DISABLED_MASK8 0
-#define DISABLED_MASK9 (DISABLE_SMAP)
+#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX)
 #define DISABLED_MASK10        0
 #define DISABLED_MASK11        0
 #define DISABLED_MASK12        0
index 972a34d..2b5fc9a 100644 (file)
 #define MSR_IA32_MCG_CAP               0x00000179
 #define MSR_IA32_MCG_STATUS            0x0000017a
 #define MSR_IA32_MCG_CTL               0x0000017b
+#define MSR_ERROR_CONTROL              0x0000017f
 #define MSR_IA32_MCG_EXT_CTL           0x000004d0
 
 #define MSR_OFFCORE_RSP_0              0x000001a6
 #define MSR_PP1_ENERGY_STATUS          0x00000641
 #define MSR_PP1_POLICY                 0x00000642
 
-#define MSR_AMD_PKG_ENERGY_STATUS      0xc001029b
 #define MSR_AMD_RAPL_POWER_UNIT                0xc0010299
+#define MSR_AMD_CORE_ENERGY_STATUS             0xc001029a
+#define MSR_AMD_PKG_ENERGY_STATUS      0xc001029b
 
 /* Config TDP MSRs */
 #define MSR_CONFIG_TDP_NOMINAL         0x00000648
 #define FEAT_CTL_LOCKED                                BIT(0)
 #define FEAT_CTL_VMX_ENABLED_INSIDE_SMX                BIT(1)
 #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX       BIT(2)
+#define FEAT_CTL_SGX_LC_ENABLED                        BIT(17)
+#define FEAT_CTL_SGX_ENABLED                   BIT(18)
 #define FEAT_CTL_LMCE_ENABLED                  BIT(20)
 
 #define MSR_IA32_TSC_ADJUST             0x0000003b
 #define MSR_IA32_UCODE_WRITE           0x00000079
 #define MSR_IA32_UCODE_REV             0x0000008b
 
+/* Intel SGX Launch Enclave Public Key Hash MSRs */
+#define MSR_IA32_SGXLEPUBKEYHASH0      0x0000008C
+#define MSR_IA32_SGXLEPUBKEYHASH1      0x0000008D
+#define MSR_IA32_SGXLEPUBKEYHASH2      0x0000008E
+#define MSR_IA32_SGXLEPUBKEYHASH3      0x0000008F
+
 #define MSR_IA32_SMM_MONITOR_CTL       0x0000009b
 #define MSR_IA32_SMBASE                        0x0000009e
 
index cdde783..89ba522 100644 (file)
@@ -90,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
 ###############################
 
 $(OUTPUT)test-all.bin:
-       $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
+       $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
 
 $(OUTPUT)test-hello.bin:
        $(BUILD)
index 90c3155..cacd66a 100644 (file)
@@ -148,6 +148,7 @@ void print_usage(void)
                "  -s         Set line as open source\n"
                "  -r         Listen for rising edges\n"
                "  -f         Listen for falling edges\n"
+               "  -w         Report the wall-clock time for events\n"
                "  -b <n>     Debounce the line with period n microseconds\n"
                " [-c <n>]    Do <n> loops (optional, infinite loop if not stated)\n"
                "  -?         This helptext\n"
@@ -173,7 +174,7 @@ int main(int argc, char **argv)
 
        memset(&config, 0, sizeof(config));
        config.flags = GPIO_V2_LINE_FLAG_INPUT;
-       while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
+       while ((c = getopt(argc, argv, "c:n:o:b:dsrfw?")) != -1) {
                switch (c) {
                case 'c':
                        loops = strtoul(optarg, NULL, 10);
@@ -204,6 +205,9 @@ int main(int argc, char **argv)
                case 'f':
                        config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
                        break;
+               case 'w':
+                       config.flags |= GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME;
+                       break;
                case '?':
                        print_usage();
                        return -1;
index 5a05a45..c61d061 100644 (file)
@@ -65,6 +65,10 @@ struct gpio_flag flagnames[] = {
                .name = "bias-disabled",
                .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
        },
+       {
+               .name = "clock-realtime",
+               .mask = GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME,
+       },
 };
 
 static void print_attributes(struct gpio_v2_line_info *info)
index cc7070c..ce365d2 100644 (file)
@@ -79,4 +79,9 @@
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 #endif // static_assert
 
+#ifdef __GENKSYMS__
+/* genksyms gets confused by _Static_assert */
+#define _Static_assert(expr, ...)
+#endif
+
 #endif /* _LINUX_BUILD_BUG_H */
index d22a974..ff872dc 100644 (file)
@@ -2,9 +2,7 @@
 #ifndef _TOOLS_LINUX_COMPILER_H_
 #define _TOOLS_LINUX_COMPILER_H_
 
-#ifdef __GNUC__
-#include <linux/compiler-gcc.h>
-#endif
+#include <linux/compiler_types.h>
 
 #ifndef __compiletime_error
 # define __compiletime_error(message)
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
new file mode 100644 (file)
index 0000000..feea090
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COMPILER_TYPES_H
+#define __LINUX_COMPILER_TYPES_H
+
+/* Builtins */
+
+/*
+ * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21.
+ * In the meantime, to support gcc < 10, we implement __has_builtin
+ * by hand.
+ */
+#ifndef __has_builtin
+#define __has_builtin(x) (0)
+#endif
+
+/* Compiler specific macros. */
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
+#endif
+
+#endif /* __LINUX_COMPILER_TYPES_H */
index 310090b..29ed3fe 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _LINUX_CTYPE_H
 #define _LINUX_CTYPE_H
 
+#include <linux/compiler.h>
+
 /*
  * NOTE! This ctype does not handle EOF like the standard C
  * library is required to.
@@ -23,11 +25,6 @@ extern const unsigned char _ctype[];
 #define isalnum(c)     ((__ismask(c)&(_U|_L|_D)) != 0)
 #define isalpha(c)     ((__ismask(c)&(_U|_L)) != 0)
 #define iscntrl(c)     ((__ismask(c)&(_C)) != 0)
-static inline int __isdigit(int c)
-{
-       return '0' <= c && c <= '9';
-}
-#define isdigit(c)     __isdigit(c)
 #define isgraph(c)     ((__ismask(c)&(_P|_U|_L|_D)) != 0)
 #define islower(c)     ((__ismask(c)&(_L)) != 0)
 #define isprint(c)     ((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
@@ -40,6 +37,16 @@ static inline int __isdigit(int c)
 #define isascii(c) (((unsigned char)(c))<=0x7f)
 #define toascii(c) (((unsigned char)(c))&0x7f)
 
+#if __has_builtin(__builtin_isdigit)
+#define  isdigit(c) __builtin_isdigit(c)
+#else
+static inline int __isdigit(int c)
+{
+       return '0' <= c && c <= '9';
+}
+#define  isdigit(c) __isdigit(c)
+#endif
+
 static inline unsigned char __tolower(unsigned char c)
 {
        if (isupper(c))
index 5e9e781..db5c993 100644 (file)
@@ -46,4 +46,5 @@ extern char * __must_check skip_spaces(const char *);
 
 extern char *strim(char *);
 
+extern void *memchr_inv(const void *start, int c, size_t bytes);
 #endif /* _TOOLS_LINUX_STRING_H_ */
index 2056318..fc48c64 100644 (file)
@@ -517,7 +517,7 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
 #endif
 
-/* kernel/timer.c */
+/* kernel/sys.c */
 #define __NR_getpid 172
 __SYSCALL(__NR_getpid, sys_getpid)
 #define __NR_getppid 173
index 5ed721a..af2a44c 100644 (file)
@@ -28,4 +28,9 @@
 #define _BITUL(x)      (_UL(1) << (x))
 #define _BITULL(x)     (_ULL(1) << (x))
 
+#define __ALIGN_KERNEL(x, a)           __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask)   (((x) + (mask)) & ~(mask))
+
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
 #endif /* _UAPI_LINUX_CONST_H */
index e5de603..9f4428b 100644 (file)
@@ -20,7 +20,6 @@
 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY         0x04
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64     0x08
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32     0x10
-#define FSCRYPT_POLICY_FLAGS_VALID             0x1F
 
 /* Encryption algorithms */
 #define FSCRYPT_MODE_AES_256_XTS               1
@@ -28,7 +27,7 @@
 #define FSCRYPT_MODE_AES_128_CBC               5
 #define FSCRYPT_MODE_AES_128_CTS               6
 #define FSCRYPT_MODE_ADIANTUM                  9
-#define __FSCRYPT_MODE_MAX                     9
+/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */
 
 /*
  * Legacy policy version; ad-hoc KDF and no key verification.
@@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg {
 #define FS_POLICY_FLAGS_PAD_32         FSCRYPT_POLICY_FLAGS_PAD_32
 #define FS_POLICY_FLAGS_PAD_MASK       FSCRYPT_POLICY_FLAGS_PAD_MASK
 #define FS_POLICY_FLAG_DIRECT_KEY      FSCRYPT_POLICY_FLAG_DIRECT_KEY
-#define FS_POLICY_FLAGS_VALID          FSCRYPT_POLICY_FLAGS_VALID
+#define FS_POLICY_FLAGS_VALID          0x07    /* contains old flags only */
 #define FS_ENCRYPTION_MODE_INVALID     0       /* never used */
 #define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS
 #define FS_ENCRYPTION_MODE_AES_256_GCM 2       /* never used */
index b95d3c4..b15e344 100644 (file)
@@ -143,8 +143,10 @@ enum perf_event_sample_format {
        PERF_SAMPLE_PHYS_ADDR                   = 1U << 19,
        PERF_SAMPLE_AUX                         = 1U << 20,
        PERF_SAMPLE_CGROUP                      = 1U << 21,
+       PERF_SAMPLE_DATA_PAGE_SIZE              = 1U << 22,
+       PERF_SAMPLE_CODE_PAGE_SIZE              = 1U << 23,
 
-       PERF_SAMPLE_MAX = 1U << 22,             /* non-ABI */
+       PERF_SAMPLE_MAX = 1U << 24,             /* non-ABI */
 
        __PERF_SAMPLE_CALLCHAIN_EARLY           = 1ULL << 63, /* non-ABI; internal use */
 };
@@ -896,6 +898,8 @@ enum perf_event_type {
         *      { u64                   phys_addr;} && PERF_SAMPLE_PHYS_ADDR
         *      { u64                   size;
         *        char                  data[size]; } && PERF_SAMPLE_AUX
+        *      { u64                   data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
+        *      { u64                   code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
         * };
         */
        PERF_RECORD_SAMPLE                      = 9,
index 7f08277..90deb41 100644 (file)
@@ -247,4 +247,9 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER              57
 #define PR_GET_IO_FLUSHER              58
 
+/* Dispatch syscalls to a userspace handler */
+#define PR_SET_SYSCALL_USER_DISPATCH   59
+# define PR_SYS_DISPATCH_OFF           0
+# define PR_SYS_DISPATCH_ON            1
+
 #endif /* _LINUX_PRCTL_H */
index 82cc58f..1500a0f 100644 (file)
@@ -171,9 +171,12 @@ struct statx {
  * be of use to ordinary userspace programs such as GUIs or ls rather than
  * specialised tools.
  *
- * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags
  * semantically.  Where possible, the numerical value is picked to correspond
- * also.
+ * also.  Note that the DAX attribute indicates that the file is in the CPU
+ * direct access state.  It does not correspond to the per-inode flag that
+ * some filesystems support.
+ *
  */
 #define STATX_ATTR_COMPRESSED          0x00000004 /* [I] File is compressed by the fs */
 #define STATX_ATTR_IMMUTABLE           0x00000010 /* [I] File is marked immutable */
@@ -183,7 +186,7 @@ struct statx {
 #define STATX_ATTR_AUTOMOUNT           0x00001000 /* Dir: Automount trigger */
 #define STATX_ATTR_MOUNT_ROOT          0x00002000 /* Root of a mount */
 #define STATX_ATTR_VERITY              0x00100000 /* [I] Verity protected file */
-#define STATX_ATTR_DAX                 0x00002000 /* [I] File is DAX */
+#define STATX_ATTR_DAX                 0x00200000 /* File is currently in DAX state */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
index f645343..8b6892f 100644 (file)
@@ -168,3 +168,61 @@ char *strreplace(char *s, char old, char new)
                        *s = new;
        return s;
 }
+
+static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes)
+{
+       while (bytes) {
+               if (*start != value)
+                       return (void *)start;
+               start++;
+               bytes--;
+       }
+       return NULL;
+}
+
+/**
+ * memchr_inv - Find an unmatching character in an area of memory.
+ * @start: The memory area
+ * @c: Find a character other than c
+ * @bytes: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *start, int c, size_t bytes)
+{
+       u8 value = c;
+       u64 value64;
+       unsigned int words, prefix;
+
+       if (bytes <= 16)
+               return check_bytes8(start, value, bytes);
+
+       value64 = value;
+       value64 |= value64 << 8;
+       value64 |= value64 << 16;
+       value64 |= value64 << 32;
+
+       prefix = (unsigned long)start % 8;
+       if (prefix) {
+               u8 *r;
+
+               prefix = 8 - prefix;
+               r = check_bytes8(start, value, prefix);
+               if (r)
+                       return r;
+               start += prefix;
+               bytes -= prefix;
+       }
+
+       words = bytes / 8;
+
+       while (words) {
+               if (*(u64 *)start != value64)
+                       return check_bytes8(start, value, 8);
+               start += 8;
+               words--;
+       }
+
+       return check_bytes8(start, value, bytes % 8);
+}
index d3740c8..079cdfa 100644 (file)
@@ -11,6 +11,7 @@
                d       create a debug log
                f       synthesize first level cache events
                m       synthesize last level cache events
+               M       synthesize memory events
                t       synthesize TLB events
                a       synthesize remote access events
                g       synthesize a call chain (use with i or x)
index 31069d8..5c379ad 100644 (file)
@@ -138,7 +138,7 @@ If you want to add or modify several config items, you can do like
 
 To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
 
-       % perf config --user report sort-order=srcline
+       % perf config --user report.sort-order=srcline
 
 To change colors of selected line to other foreground and background colors
 in system config file (i.e. `$(sysconf)/perfconfig`), do
index 768888b..34cf651 100644 (file)
@@ -293,6 +293,9 @@ OPTIONS
 --phys-data::
        Record the sample physical addresses.
 
+--data-page-size::
+       Record the sampled data address data page size.
+
 -T::
 --timestamp::
        Record the sample timestamps. Use it with 'perf report -D' to see the
@@ -634,11 +637,17 @@ endif::HAVE_LIBPFM[]
 --control=fifo:ctl-fifo[,ack-fifo]::
 --control=fd:ctl-fd[,ack-fd]::
 ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
-Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
-'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be
-started with events disabled using --delay=-1 option. Optionally send control command
-completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process.
-Example of bash shell script to enable and disable events during measurements:
+Listen on ctl-fd descriptor for command to control measurement.
+
+Available commands:
+  'enable'  : enable events
+  'disable' : disable events
+  'snapshot': AUX area tracing snapshot).
+
+Measurements can be started with events disabled using --delay=-1 option. Optionally
+send control command completion ('ack\n') to ack-fd descriptor to synchronize with the
+controlling process.  Example of bash shell script to enable and disable events during
+measurements:
 
  #!/bin/bash
 
index d068103..8f7f4e9 100644 (file)
@@ -150,6 +150,7 @@ OPTIONS
        - snoop: type of snoop (if any) for the data at the time of the sample
        - dcacheline: the cacheline the data address is on at the time of the sample
        - phys_daddr: physical address of data being executed on at the time of sample
+       - data_page_size: the data page size of data being executed on at the time of sample
 
        And the default sort keys are changed to local_weight, mem, sym, dso,
        symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
index 4f712fb..44d3721 100644 (file)
@@ -116,8 +116,9 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-        srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-        brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc.
+        srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
+        brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr,
+        metric, misc, srccode, ipc, data_page_size.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
index 9f9f290..5d4a673 100644 (file)
@@ -168,8 +168,9 @@ command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
 
 --for-each-cgroup name::
 Expand event list for each cgroup in "name" (allow multiple cgroups separated
-by comma).  This has same effect that repeating -e option and -G option for
-each event x name.  This option cannot be used with -G/--cgroup option.
+by comma).  It also support regex patterns to match multiple groups.  This has same
+effect that repeating -e option and -G option for each event x name.  This option
+cannot be used with -G/--cgroup option.
 
 -o file::
 --output file::
@@ -316,6 +317,10 @@ small group that need not have multiplexing is lowered. This option
 forbids the event merging logic from sharing events between groups and
 may be used to increase accuracy in this case.
 
+--quiet::
+Don't print output. This is useful with perf stat record below to only
+write data to the perf.data file.
+
 STAT RECORD
 -----------
 Stores stat data into perf data file.
index cad7bf7..bd446ab 100644 (file)
@@ -395,7 +395,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
         * To obtain the auxtrace buffer file descriptor, the auxtrace
         * event must come first.
         */
-       perf_evlist__to_front(evlist, cs_etm_evsel);
+       evlist__to_front(evlist, cs_etm_evsel);
 
        /*
         * In the case of per-cpu mmaps, we need the CPU on the
@@ -420,7 +420,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
                        goto out;
 
                tracking_evsel = evlist__last(evlist);
-               perf_evlist__set_tracking_event(evlist, tracking_evsel);
+               evlist__set_tracking_event(evlist, tracking_evsel);
 
                tracking_evsel->core.attr.freq = 0;
                tracking_evsel->core.attr.sample_period = 1;
index dbef716..fab3095 100644 (file)
@@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 PERF_HAVE_JITDUMP := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+HAVE_KVM_STAT_SUPPORT := 1
 
 #
 # Syscall table generation for perf
index b53294d..ead2f22 100644 (file)
@@ -2,6 +2,7 @@ perf-y += header.o
 perf-y += machine.o
 perf-y += perf_regs.o
 perf-y += tsc.o
+perf-y += kvm-stat.o
 perf-$(CONFIG_DWARF)     += dwarf-regs.o
 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
@@ -9,4 +10,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
                              ../../arm/util/auxtrace.o \
                              ../../arm/util/cs-etm.o \
-                             arm-spe.o
+                             arm-spe.o mem-events.o
index e359306..414c8a5 100644 (file)
@@ -118,7 +118,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
         * To obtain the auxtrace buffer file descriptor, the auxtrace event
         * must come first.
         */
-       perf_evlist__to_front(evlist, arm_spe_evsel);
+       evlist__to_front(evlist, arm_spe_evsel);
 
        evsel__set_sample_bit(arm_spe_evsel, CPU);
        evsel__set_sample_bit(arm_spe_evsel, TIME);
@@ -130,7 +130,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
                return err;
 
        tracking_evsel = evlist__last(evlist);
-       perf_evlist__set_tracking_event(evlist, tracking_evsel);
+       evlist__set_tracking_event(evlist, tracking_evsel);
 
        tracking_evsel->core.attr.freq = 0;
        tracking_evsel->core.attr.sample_period = 1;
diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h
new file mode 100644 (file)
index 0000000..27c981e
--- /dev/null
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H
+#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H
+
+/* Per asm/virt.h */
+#define HVC_STUB_ERR             0xbadca11
+
+/* Per asm/kvm_asm.h */
+#define ARM_EXCEPTION_IRQ              0
+#define ARM_EXCEPTION_EL1_SERROR       1
+#define ARM_EXCEPTION_TRAP             2
+#define ARM_EXCEPTION_IL               3
+/* The hyp-stub will return this for any kvm_call_hyp() call */
+#define ARM_EXCEPTION_HYP_GONE         HVC_STUB_ERR
+
+#define kvm_arm_exception_type                                 \
+       {ARM_EXCEPTION_IRQ,             "IRQ"           },      \
+       {ARM_EXCEPTION_EL1_SERROR,      "SERROR"        },      \
+       {ARM_EXCEPTION_TRAP,            "TRAP"          },      \
+       {ARM_EXCEPTION_IL,              "ILLEGAL"       },      \
+       {ARM_EXCEPTION_HYP_GONE,        "HYP_GONE"      }
+
+/* Per asm/esr.h */
+#define ESR_ELx_EC_UNKNOWN     (0x00)
+#define ESR_ELx_EC_WFx         (0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32     (0x03)
+#define ESR_ELx_EC_CP15_64     (0x04)
+#define ESR_ELx_EC_CP14_MR     (0x05)
+#define ESR_ELx_EC_CP14_LS     (0x06)
+#define ESR_ELx_EC_FP_ASIMD    (0x07)
+#define ESR_ELx_EC_CP10_ID     (0x08)  /* EL2 only */
+#define ESR_ELx_EC_PAC         (0x09)  /* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_CP14_64     (0x0C)
+/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_ILL         (0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32       (0x11)
+#define ESR_ELx_EC_HVC32       (0x12)  /* EL2 only */
+#define ESR_ELx_EC_SMC32       (0x13)  /* EL2 and above */
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64       (0x15)
+#define ESR_ELx_EC_HVC64       (0x16)  /* EL2 and above */
+#define ESR_ELx_EC_SMC64       (0x17)  /* EL2 and above */
+#define ESR_ELx_EC_SYS64       (0x18)
+#define ESR_ELx_EC_SVE         (0x19)
+#define ESR_ELx_EC_ERET                (0x1a)  /* EL2 only */
+/* Unallocated EC: 0x1b - 0x1E */
+#define ESR_ELx_EC_IMP_DEF     (0x1f)  /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW    (0x20)
+#define ESR_ELx_EC_IABT_CUR    (0x21)
+#define ESR_ELx_EC_PC_ALIGN    (0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW    (0x24)
+#define ESR_ELx_EC_DABT_CUR    (0x25)
+#define ESR_ELx_EC_SP_ALIGN    (0x26)
+/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_FP_EXC32    (0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64    (0x2C)
+/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_SERROR      (0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW (0x30)
+#define ESR_ELx_EC_BREAKPT_CUR (0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
+#define ESR_ELx_EC_WATCHPT_LOW (0x34)
+#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32      (0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32    (0x3A)  /* EL2 only */
+/* Unallocated EC: 0x3B */
+#define ESR_ELx_EC_BRK64       (0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX         (0x3F)
+
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+       ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+       ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \
+       ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \
+       ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \
+       ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+       ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+       ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+       ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+       ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+
+#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */
diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c
new file mode 100644 (file)
index 0000000..50376b9
--- /dev/null
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <memory.h>
+#include "../../util/evsel.h"
+#include "../../util/kvm-stat.h"
+#include "arm64_exception_types.h"
+#include "debug.h"
+
+define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type);
+define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class);
+
+const char *kvm_trap_exit_reason = "esr_ec";
+const char *vcpu_id_str = "id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "ret";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
+const char *kvm_events_tp[] = {
+       "kvm:kvm_entry",
+       "kvm:kvm_exit",
+       NULL,
+};
+
+static void event_get_key(struct evsel *evsel,
+                         struct perf_sample *sample,
+                         struct event_key *key)
+{
+       key->info = 0;
+       key->key = evsel__intval(evsel, sample, kvm_exit_reason);
+       key->exit_reasons = arm64_exit_reasons;
+
+       /*
+        * TRAP exceptions carry exception class info in esr_ec field
+        * and, hence, we need to use a different exit_reasons table to
+        * properly decode event's est_ec.
+        */
+       if (key->key == ARM_EXCEPTION_TRAP) {
+               key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason);
+               key->exit_reasons = arm64_trap_exit_reasons;
+       }
+}
+
+static bool event_begin(struct evsel *evsel,
+                       struct perf_sample *sample __maybe_unused,
+                       struct event_key *key __maybe_unused)
+{
+       return !strcmp(evsel->name, kvm_entry_trace);
+}
+
+static bool event_end(struct evsel *evsel,
+                     struct perf_sample *sample,
+                     struct event_key *key)
+{
+       if (!strcmp(evsel->name, kvm_exit_trace)) {
+               event_get_key(evsel, sample, key);
+               return true;
+       }
+       return false;
+}
+
+static struct kvm_events_ops exit_events = {
+       .is_begin_event = event_begin,
+       .is_end_event   = event_end,
+       .decode_key     = exit_event_decode_key,
+       .name           = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+       {
+               .name   = "vmexit",
+               .ops    = &exit_events,
+       },
+       { NULL },
+};
+
+const char * const kvm_skip_events[] = {
+       NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+       kvm->exit_reasons_isa = "arm64";
+       return 0;
+}
diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c
new file mode 100644 (file)
index 0000000..2a24973
--- /dev/null
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "map_symbol.h"
+#include "mem-events.h"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+       E("spe-load",   "arm_spe_0/ts_enable=1,load_filter=1,store_filter=0,min_latency=%u/",   "arm_spe_0"),
+       E("spe-store",  "arm_spe_0/ts_enable=1,load_filter=0,store_filter=1/",                  "arm_spe_0"),
+       E("spe-ldst",   "arm_spe_0/ts_enable=1,load_filter=1,store_filter=1,min_latency=%u/",   "arm_spe_0"),
+};
+
+static char mem_ev_name[100];
+
+struct perf_mem_event *perf_mem_events__ptr(int i)
+{
+       if (i >= PERF_MEM_EVENTS__MAX)
+               return NULL;
+
+       return &perf_mem_events[i];
+}
+
+char *perf_mem_events__name(int i)
+{
+       struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+       if (i >= PERF_MEM_EVENTS__MAX)
+               return NULL;
+
+       if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE)
+               scnprintf(mem_ev_name, sizeof(mem_ev_name),
+                         e->name, perf_mem_events__loads_ldlat);
+       else /* PERF_MEM_EVENTS__STORE */
+               scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name);
+
+       return mem_ev_name;
+}
index 2833e10..54efa12 100644 (file)
@@ -2,5 +2,38 @@
 #include "../../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
+       SMPL_REG(x0, PERF_REG_ARM64_X0),
+       SMPL_REG(x1, PERF_REG_ARM64_X1),
+       SMPL_REG(x2, PERF_REG_ARM64_X2),
+       SMPL_REG(x3, PERF_REG_ARM64_X3),
+       SMPL_REG(x4, PERF_REG_ARM64_X4),
+       SMPL_REG(x5, PERF_REG_ARM64_X5),
+       SMPL_REG(x6, PERF_REG_ARM64_X6),
+       SMPL_REG(x7, PERF_REG_ARM64_X7),
+       SMPL_REG(x8, PERF_REG_ARM64_X8),
+       SMPL_REG(x9, PERF_REG_ARM64_X9),
+       SMPL_REG(x10, PERF_REG_ARM64_X10),
+       SMPL_REG(x11, PERF_REG_ARM64_X11),
+       SMPL_REG(x12, PERF_REG_ARM64_X12),
+       SMPL_REG(x13, PERF_REG_ARM64_X13),
+       SMPL_REG(x14, PERF_REG_ARM64_X14),
+       SMPL_REG(x15, PERF_REG_ARM64_X15),
+       SMPL_REG(x16, PERF_REG_ARM64_X16),
+       SMPL_REG(x17, PERF_REG_ARM64_X17),
+       SMPL_REG(x18, PERF_REG_ARM64_X18),
+       SMPL_REG(x19, PERF_REG_ARM64_X19),
+       SMPL_REG(x20, PERF_REG_ARM64_X20),
+       SMPL_REG(x21, PERF_REG_ARM64_X21),
+       SMPL_REG(x22, PERF_REG_ARM64_X22),
+       SMPL_REG(x23, PERF_REG_ARM64_X23),
+       SMPL_REG(x24, PERF_REG_ARM64_X24),
+       SMPL_REG(x25, PERF_REG_ARM64_X25),
+       SMPL_REG(x26, PERF_REG_ARM64_X26),
+       SMPL_REG(x27, PERF_REG_ARM64_X27),
+       SMPL_REG(x28, PERF_REG_ARM64_X28),
+       SMPL_REG(x29, PERF_REG_ARM64_X29),
+       SMPL_REG(lr, PERF_REG_ARM64_LR),
+       SMPL_REG(sp, PERF_REG_ARM64_SP),
+       SMPL_REG(pc, PERF_REG_ARM64_PC),
        SMPL_REG_END
 };
index 1bb8bf6..e4e5f33 100644 (file)
@@ -1 +1 @@
-# empty
+perf-y += util/
diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c
new file mode 100644 (file)
index 0000000..340993f
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static
+struct ins_ops *mips__associate_ins_ops(struct arch *arch, const char *name)
+{
+       struct ins_ops *ops = NULL;
+
+       if (!strncmp(name, "bal", 3) ||
+           !strncmp(name, "bgezal", 6) ||
+           !strncmp(name, "bltzal", 6) ||
+           !strncmp(name, "bgtzal", 6) ||
+           !strncmp(name, "blezal", 6) ||
+           !strncmp(name, "beqzal", 6) ||
+           !strncmp(name, "bnezal", 6) ||
+           !strncmp(name, "bgtzl", 5) ||
+           !strncmp(name, "bltzl", 5) ||
+           !strncmp(name, "bgezl", 5) ||
+           !strncmp(name, "blezl", 5) ||
+           !strncmp(name, "jialc", 5) ||
+           !strncmp(name, "beql", 4) ||
+           !strncmp(name, "bnel", 4) ||
+           !strncmp(name, "jal", 3))
+               ops = &call_ops;
+       else if (!strncmp(name, "jr", 2))
+               ops = &ret_ops;
+       else if (name[0] == 'j' || name[0] == 'b')
+               ops = &jump_ops;
+       else
+               return NULL;
+
+       arch__associate_ins_ops(arch, name, ops);
+
+       return ops;
+}
+
+static
+int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+       if (!arch->initialized) {
+               arch->associate_instruction_ops = mips__associate_ins_ops;
+               arch->initialized = true;
+               arch->objdump.comment_char = '#';
+       }
+
+       return 0;
+}
index c41c5af..6a54b94 100644 (file)
@@ -7,7 +7,6 @@ struct test;
 
 /* Tests */
 int test__rdpmc(struct test *test __maybe_unused, int subtest);
-int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
 int test__insn_x86(struct test *test __maybe_unused, int subtest);
 int test__intel_pt_pkt_decoder(struct test *test, int subtest);
 int test__bp_modify(struct test *test, int subtest);
index 2997c50..36d4f24 100644 (file)
@@ -3,6 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 
 perf-y += arch-tests.o
 perf-y += rdpmc.o
-perf-y += perf-time-to-tsc.o
 perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
 perf-$(CONFIG_X86_64) += bp-modify.o
index 6763135..bc25d72 100644 (file)
@@ -8,10 +8,6 @@ struct test arch_tests[] = {
                .desc = "x86 rdpmc",
                .func = test__rdpmc,
        },
-       {
-               .desc = "Convert perf time to TSC",
-               .func = test__perf_time_to_tsc,
-       },
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
        {
                .desc = "DWARF unwind",
index 3ec562a..27dd8cf 100644 (file)
@@ -52,7 +52,7 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt
 
        evlist = evlist__new();
        if (!evlist) {
-               pr_debug("perf_evlist__new failed\n");
+               pr_debug("evlist__new failed\n");
                return TEST_FAIL;
        }
 
index 0dc09b5..4a76d49 100644 (file)
@@ -218,7 +218,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
                 * To obtain the auxtrace buffer file descriptor, the auxtrace event
                 * must come first.
                 */
-               perf_evlist__to_front(evlist, intel_bts_evsel);
+               evlist__to_front(evlist, intel_bts_evsel);
                /*
                 * In the case of per-cpu mmaps, we need the CPU on the
                 * AUX event.
@@ -238,7 +238,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 
                tracking_evsel = evlist__last(evlist);
 
-               perf_evlist__set_tracking_event(evlist, tracking_evsel);
+               evlist__set_tracking_event(evlist, tracking_evsel);
 
                tracking_evsel->core.attr.freq = 0;
                tracking_evsel->core.attr.sample_period = 1;
index 082e5f2..a6420c6 100644 (file)
@@ -416,7 +416,7 @@ static int intel_pt_track_switches(struct evlist *evlist)
        struct evsel *evsel;
        int err;
 
-       if (!perf_evlist__can_select_event(evlist, sched_switch))
+       if (!evlist__can_select_event(evlist, sched_switch))
                return -EPERM;
 
        err = parse_events(evlist, sched_switch, NULL);
@@ -846,7 +846,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                 * To obtain the auxtrace buffer file descriptor, the auxtrace
                 * event must come first.
                 */
-               perf_evlist__to_front(evlist, intel_pt_evsel);
+               evlist__to_front(evlist, intel_pt_evsel);
                /*
                 * In the case of per-cpu mmaps, we need the CPU on the
                 * AUX event.
@@ -865,7 +865,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 
                tracking_evsel = evlist__last(evlist);
 
-               perf_evlist__set_tracking_event(evlist, tracking_evsel);
+               evlist__set_tracking_event(evlist, tracking_evsel);
 
                tracking_evsel->core.attr.freq = 0;
                tracking_evsel->core.attr.sample_period = 1;
index 4940d10..a23ba6b 100644 (file)
@@ -412,7 +412,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
        if (dump_trace) {
                perf_session__fprintf_nr_events(session, stdout);
-               perf_evlist__fprintf_nr_events(session->evlist, stdout);
+               evlist__fprintf_nr_events(session->evlist, stdout);
                goto out;
        }
 
@@ -598,7 +598,7 @@ int cmd_annotate(int argc, const char **argv)
                                                      HEADER_BRANCH_STACK);
 
        if (annotate.group_set)
-               perf_evlist__force_leader(annotate.session->evlist);
+               evlist__force_leader(annotate.session->evlist);
 
        ret = symbol__annotation_init();
        if (ret < 0)
index d5bea5d..c5babea 100644 (file)
@@ -369,6 +369,10 @@ static struct perf_c2c c2c = {
                .exit           = perf_event__process_exit,
                .fork           = perf_event__process_fork,
                .lost           = perf_event__process_lost,
+               .attr           = perf_event__process_attr,
+               .auxtrace_info  = perf_event__process_auxtrace_info,
+               .auxtrace       = perf_event__process_auxtrace,
+               .auxtrace_error = perf_event__process_auxtrace_error,
                .ordered_events = true,
                .ordering_requires_timestamps = true,
        },
@@ -2678,6 +2682,12 @@ static int setup_coalesce(const char *coalesce, bool no_source)
 
 static int perf_c2c__report(int argc, const char **argv)
 {
+       struct itrace_synth_opts itrace_synth_opts = {
+               .set = true,
+               .mem = true,    /* Only enable memory event */
+               .default_no_sample = true,
+       };
+
        struct perf_session *session;
        struct ui_progress prog;
        struct perf_data data = {
@@ -2757,6 +2767,8 @@ static int perf_c2c__report(int argc, const char **argv)
                goto out;
        }
 
+       session->itrace_synth_opts = &itrace_synth_opts;
+
        err = setup_nodes(session);
        if (err) {
                pr_err("Failed setup nodes\n");
@@ -2867,6 +2879,7 @@ static int perf_c2c__record(int argc, const char **argv)
        int ret;
        bool all_user = false, all_kernel = false;
        bool event_set = false;
+       struct perf_mem_event *e;
        struct option options[] = {
        OPT_CALLBACK('e', "event", &event_set, "event",
                     "event selector. Use 'perf c2c record -e list' to list available events",
@@ -2894,11 +2907,24 @@ static int perf_c2c__record(int argc, const char **argv)
        rec_argv[i++] = "record";
 
        if (!event_set) {
-               perf_mem_events[PERF_MEM_EVENTS__LOAD].record  = true;
-               perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+               e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
+               /*
+                * The load and store operations are required, use the event
+                * PERF_MEM_EVENTS__LOAD_STORE if it is supported.
+                */
+               if (e->tag) {
+                       e->record = true;
+               } else {
+                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+                       e->record = true;
+
+                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+                       e->record = true;
+               }
        }
 
-       if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+       if (e->record)
                rec_argv[i++] = "-W";
 
        rec_argv[i++] = "-d";
@@ -2906,12 +2932,13 @@ static int perf_c2c__record(int argc, const char **argv)
        rec_argv[i++] = "--sample-cpu";
 
        for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-               if (!perf_mem_events[j].record)
+               e = perf_mem_events__ptr(j);
+               if (!e->record)
                        continue;
 
-               if (!perf_mem_events[j].supported) {
+               if (!e->supported) {
                        pr_err("failed: event '%s' not supported\n",
-                              perf_mem_events[j].name);
+                              perf_mem_events__name(j));
                        free(rec_argv);
                        return -1;
                }
index cefc715..8f6c784 100644 (file)
@@ -494,7 +494,7 @@ static struct evsel *evsel_match(struct evsel *evsel,
        return NULL;
 }
 
-static void perf_evlist__collapse_resort(struct evlist *evlist)
+static void evlist__collapse_resort(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -1214,7 +1214,7 @@ static int __cmd_diff(void)
                        goto out_delete;
                }
 
-               perf_evlist__collapse_resort(d->session->evlist);
+               evlist__collapse_resort(d->session->evlist);
 
                if (pdiff.ptime_range)
                        zfree(&pdiff.ptime_range);
index 98e9928..4617b32 100644 (file)
 #include "util/data.h"
 #include "util/debug.h"
 #include <linux/err.h>
+#include "util/tool.h"
+
+static int process_header_feature(struct perf_session *session __maybe_unused,
+                                 union perf_event *event __maybe_unused)
+{
+       session_done = 1;
+       return 0;
+}
 
 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
@@ -27,12 +35,20 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
                .mode      = PERF_DATA_MODE_READ,
                .force     = details->force,
        };
+       struct perf_tool tool = {
+               /* only needed for pipe mode */
+               .attr = perf_event__process_attr,
+               .feature = process_header_feature,
+       };
        bool has_tracepoint = false;
 
-       session = perf_session__new(&data, 0, NULL);
+       session = perf_session__new(&data, 0, &tool);
        if (IS_ERR(session))
                return PTR_ERR(session);
 
+       if (data.is_pipe)
+               perf_session__process_events(session);
+
        evlist__for_each_entry(session->evlist, pos) {
                evsel__fprintf(pos, details, stdout);
 
index 9366fad..d49448a 100644 (file)
@@ -67,7 +67,7 @@ static void sig_handler(int sig __maybe_unused)
 }
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
  * we asked by setting its exec_error to the function below,
  * ftrace__workload_exec_failed_signal.
  *
@@ -600,9 +600,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
        if (write_tracing_file("trace", "0") < 0)
                goto out;
 
-       if (argc && perf_evlist__prepare_workload(ftrace->evlist,
-                               &ftrace->target, argv, false,
-                               ftrace__workload_exec_failed_signal) < 0) {
+       if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
+                                            ftrace__workload_exec_failed_signal) < 0) {
                goto out;
        }
 
@@ -644,7 +643,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
                }
        }
 
-       perf_evlist__start_workload(ftrace->evlist);
+       evlist__start_workload(ftrace->evlist);
 
        if (ftrace->initial_delay) {
                usleep(ftrace->initial_delay * 1000);
@@ -958,7 +957,7 @@ int cmd_ftrace(int argc, const char **argv)
                goto out_delete_filters;
        }
 
-       ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target);
+       ret = evlist__create_maps(ftrace.evlist, &ftrace.target);
        if (ret < 0)
                goto out_delete_evlist;
 
index 0462dc8..43937f4 100644 (file)
@@ -843,10 +843,12 @@ int cmd_inject(int argc, const char **argv)
                .output = {
                        .path = "-",
                        .mode = PERF_DATA_MODE_WRITE,
+                       .use_stdio = true,
                },
        };
        struct perf_data data = {
                .mode = PERF_DATA_MODE_READ,
+               .use_stdio = true,
        };
        int ret;
 
index a50dae2..0062445 100644 (file)
@@ -1960,18 +1960,15 @@ int cmd_kmem(int argc, const char **argv)
        ret = -1;
 
        if (kmem_slab) {
-               if (!perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                         "kmem:kmalloc")) {
+               if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) {
                        pr_err(errmsg, "slab", "slab");
                        goto out_delete;
                }
        }
 
        if (kmem_page) {
-               struct evsel *evsel;
+               struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
 
-               evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                            "kmem:mm_page_alloc");
                if (evsel == NULL) {
                        pr_err(errmsg, "page", "page");
                        goto out_delete;
index 460945d..1105c9e 100644 (file)
@@ -764,7 +764,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
                return (err == -EAGAIN) ? 0 : -1;
 
        while ((event = perf_mmap__read_event(&md->core)) != NULL) {
-               err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+               err = evlist__parse_sample_timestamp(evlist, event, &timestamp);
                if (err) {
                        perf_mmap__consume(&md->core);
                        pr_err("Failed to parse sample\n");
@@ -1022,7 +1022,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
        struct evlist *evlist = kvm->evlist;
        char sbuf[STRERR_BUFSIZE];
 
-       perf_evlist__config(evlist, &kvm->opts, NULL);
+       evlist__config(evlist, &kvm->opts, NULL);
 
        /*
         * Note: exclude_{guest,host} do not apply here.
@@ -1349,8 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
                OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
                        "record events on existing process id"),
                OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
-                       "number of mmap data pages",
-                       perf_evlist__parse_mmap_pages),
+                       "number of mmap data pages", evlist__parse_mmap_pages),
                OPT_INCR('v', "verbose", &verbose,
                        "be more verbose (show counter open errors, etc)"),
                OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
@@ -1442,7 +1441,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
                goto out;
        }
 
-       if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+       if (evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
                usage_with_options(live_usage, live_options);
 
        /*
index 3523279..8237420 100644 (file)
@@ -7,6 +7,7 @@
 #include "perf.h"
 
 #include <subcmd/parse-options.h>
+#include "util/auxtrace.h"
 #include "util/trace-event.h"
 #include "util/tool.h"
 #include "util/session.h"
@@ -64,6 +65,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        const char **rec_argv;
        int ret;
        bool all_user = false, all_kernel = false;
+       struct perf_mem_event *e;
        struct option options[] = {
        OPT_CALLBACK('e', "event", &mem, "event",
                     "event selector. use 'perf mem record -e list' to list available events",
@@ -76,6 +78,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        OPT_END()
        };
 
+       if (perf_mem_events__init()) {
+               pr_err("failed: memory events not supported\n");
+               return -1;
+       }
+
        argc = parse_options(argc, argv, options, record_mem_usage,
                             PARSE_OPT_KEEP_UNKNOWN);
 
@@ -86,13 +93,30 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
        rec_argv[i++] = "record";
 
-       if (mem->operation & MEM_OPERATION_LOAD)
-               perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
 
-       if (mem->operation & MEM_OPERATION_STORE)
-               perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+       /*
+        * The load and store operations are required, use the event
+        * PERF_MEM_EVENTS__LOAD_STORE if it is supported.
+        */
+       if (e->tag &&
+           (mem->operation & MEM_OPERATION_LOAD) &&
+           (mem->operation & MEM_OPERATION_STORE)) {
+               e->record = true;
+       } else {
+               if (mem->operation & MEM_OPERATION_LOAD) {
+                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+                       e->record = true;
+               }
 
-       if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+               if (mem->operation & MEM_OPERATION_STORE) {
+                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+                       e->record = true;
+               }
+       }
+
+       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+       if (e->record)
                rec_argv[i++] = "-W";
 
        rec_argv[i++] = "-d";
@@ -101,10 +125,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
                rec_argv[i++] = "--phys-data";
 
        for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-               if (!perf_mem_events[j].record)
+               e = perf_mem_events__ptr(j);
+               if (!e->record)
                        continue;
 
-               if (!perf_mem_events[j].supported) {
+               if (!e->supported) {
                        pr_err("failed: event '%s' not supported\n",
                               perf_mem_events__name(j));
                        free(rec_argv);
@@ -231,6 +256,12 @@ static int process_sample_event(struct perf_tool *tool,
 
 static int report_raw_events(struct perf_mem *mem)
 {
+       struct itrace_synth_opts itrace_synth_opts = {
+               .set = true,
+               .mem = true,    /* Only enable memory event */
+               .default_no_sample = true,
+       };
+
        struct perf_data data = {
                .path  = input_name,
                .mode  = PERF_DATA_MODE_READ,
@@ -243,6 +274,8 @@ static int report_raw_events(struct perf_mem *mem)
        if (IS_ERR(session))
                return PTR_ERR(session);
 
+       session->itrace_synth_opts = &itrace_synth_opts;
+
        if (mem->cpu_list) {
                ret = perf_session__cpu_bitmap(session, mem->cpu_list,
                                               mem->cpu_bitmap);
@@ -265,11 +298,35 @@ out_delete:
        perf_session__delete(session);
        return ret;
 }
+static char *get_sort_order(struct perf_mem *mem)
+{
+       bool has_extra_options = mem->phys_addr ? true : false;
+       char sort[128];
+
+       /*
+        * there is no weight (cost) associated with stores, so don't print
+        * the column
+        */
+       if (!(mem->operation & MEM_OPERATION_LOAD)) {
+               strcpy(sort, "--sort=mem,sym,dso,symbol_daddr,"
+                            "dso_daddr,tlb,locked");
+       } else if (has_extra_options) {
+               strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+                            "dso_daddr,snoop,tlb,locked");
+       } else
+               return NULL;
+
+       if (mem->phys_addr)
+               strcat(sort, ",phys_daddr");
+
+       return strdup(sort);
+}
 
 static int report_events(int argc, const char **argv, struct perf_mem *mem)
 {
        const char **rep_argv;
        int ret, i = 0, j, rep_argc;
+       char *new_sort_order;
 
        if (mem->dump_raw)
                return report_raw_events(mem);
@@ -283,20 +340,9 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
        rep_argv[i++] = "--mem-mode";
        rep_argv[i++] = "-n"; /* display number of samples */
 
-       /*
-        * there is no weight (cost) associated with stores, so don't print
-        * the column
-        */
-       if (!(mem->operation & MEM_OPERATION_LOAD)) {
-               if (mem->phys_addr)
-                       rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
-                                       "dso_daddr,tlb,locked,phys_daddr";
-               else
-                       rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
-                                       "dso_daddr,tlb,locked";
-       } else if (mem->phys_addr)
-               rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
-                               "dso_daddr,snoop,tlb,locked,phys_daddr";
+       new_sort_order = get_sort_order(mem);
+       if (new_sort_order)
+               rep_argv[i++] = new_sort_order;
 
        for (j = 1; j < argc; j++, i++)
                rep_argv[i] = argv[j];
@@ -386,8 +432,12 @@ int cmd_mem(int argc, const char **argv)
                        .comm           = perf_event__process_comm,
                        .lost           = perf_event__process_lost,
                        .fork           = perf_event__process_fork,
+                       .attr           = perf_event__process_attr,
                        .build_id       = perf_event__process_build_id,
                        .namespaces     = perf_event__process_namespaces,
+                       .auxtrace_info  = perf_event__process_auxtrace_info,
+                       .auxtrace       = perf_event__process_auxtrace,
+                       .auxtrace_error = perf_event__process_auxtrace_error,
                        .ordered_events = true,
                },
                .input_name              = "perf.data",
@@ -422,11 +472,6 @@ int cmd_mem(int argc, const char **argv)
                NULL
        };
 
-       if (perf_mem_events__init()) {
-               pr_err("failed: memory events not supported\n");
-               return -1;
-       }
-
        argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
                                        mem_usage, PARSE_OPT_KEEP_UNKNOWN);
 
index adf311d..fd39116 100644 (file)
@@ -891,13 +891,13 @@ static int record__open(struct record *rec)
         * event synthesis.
         */
        if (opts->initial_delay || target__has_cpu(&opts->target)) {
-               pos = perf_evlist__get_tracking_event(evlist);
+               pos = evlist__get_tracking_event(evlist);
                if (!evsel__is_dummy_event(pos)) {
                        /* Set up dummy event. */
                        if (evlist__add_dummy(evlist))
                                return -ENOMEM;
                        pos = evlist__last(evlist);
-                       perf_evlist__set_tracking_event(evlist, pos);
+                       evlist__set_tracking_event(evlist, pos);
                }
 
                /*
@@ -910,7 +910,7 @@ static int record__open(struct record *rec)
                        pos->immediate = 1;
        }
 
-       perf_evlist__config(evlist, opts, &callchain_param);
+       evlist__config(evlist, opts, &callchain_param);
 
        evlist__for_each_entry(evlist, pos) {
 try_again:
@@ -923,7 +923,7 @@ try_again:
                        if ((errno == EINVAL || errno == EBADF) &&
                            pos->leader != pos &&
                            pos->weak_group) {
-                               pos = perf_evlist__reset_weak_group(evlist, pos, true);
+                               pos = evlist__reset_weak_group(evlist, pos, true);
                                goto try_again;
                        }
                        rc = -errno;
@@ -935,7 +935,7 @@ try_again:
                pos->supported = true;
        }
 
-       if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
+       if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
                pr_warning(
 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
@@ -946,7 +946,7 @@ try_again:
 "even with a suitable vmlinux or kallsyms file.\n\n");
        }
 
-       if (perf_evlist__apply_filters(evlist, &pos)) {
+       if (evlist__apply_filters(evlist, &pos)) {
                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
                        pos->filter, evsel__name(pos), errno,
                        str_error_r(errno, msg, sizeof(msg)));
@@ -1166,7 +1166,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
                rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
 
        if (overwrite)
-               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+               evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 out:
        return rc;
 }
@@ -1333,7 +1333,7 @@ record__switch_output(struct record *rec, bool at_exit)
 static volatile int workload_exec_errno;
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1
+ * evlist__prepare_workload will send a SIGUSR1
  * if the fork fails, since we asked by setting its
  * want_signal to true.
  */
@@ -1349,8 +1349,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 static void snapshot_sig_handler(int sig);
 static void alarm_sig_handler(int sig);
 
-static const struct perf_event_mmap_page *
-perf_evlist__pick_pc(struct evlist *evlist)
+static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
 {
        if (evlist) {
                if (evlist->mmap && evlist->mmap[0].core.base)
@@ -1363,9 +1362,7 @@ perf_evlist__pick_pc(struct evlist *evlist)
 
 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
 {
-       const struct perf_event_mmap_page *pc;
-
-       pc = perf_evlist__pick_pc(rec->evlist);
+       const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
        if (pc)
                return pc;
        return NULL;
@@ -1444,7 +1441,7 @@ static int record__synthesize(struct record *rec, bool tail)
                        goto out;
        }
 
-       if (!perf_evlist__exclude_kernel(rec->evlist)) {
+       if (!evlist__exclude_kernel(rec->evlist)) {
                err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
                                                         machine);
                WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
@@ -1548,7 +1545,7 @@ static int record__setup_sb_evlist(struct record *rec)
                }
        }
 #endif
-       if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
+       if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
                pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
                opts->no_bpf_event = true;
        }
@@ -1689,9 +1686,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        record__init_features(rec);
 
        if (forks) {
-               err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
-                                                   argv, data->is_pipe,
-                                                   workload_exec_failed_signal);
+               err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
+                                              workload_exec_failed_signal);
                if (err < 0) {
                        pr_err("Couldn't run the workload!\n");
                        status = err;
@@ -1835,7 +1831,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                                                  machine);
                free(event);
 
-               perf_evlist__start_workload(rec->evlist);
+               evlist__start_workload(rec->evlist);
        }
 
        if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
@@ -1861,11 +1857,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                 * BKW_MMAP_EMPTY here: when done == true and
                 * hits != rec->samples in previous round.
                 *
-                * perf_evlist__toggle_bkw_mmap ensure we never
+                * evlist__toggle_bkw_mmap ensure we never
                 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
                 */
                if (trigger_is_hit(&switch_output_trigger) || done || draining)
-                       perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
+                       evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
 
                if (record__mmap_read_all(rec, false) < 0) {
                        trigger_error(&auxtrace_snapshot_trigger);
@@ -1904,7 +1900,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                         * record__mmap_read_all(): we should have collected
                         * data from it.
                         */
-                       perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
+                       evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
 
                        if (!quiet)
                                fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
@@ -2066,7 +2062,7 @@ out_delete_session:
        perf_session__delete(session);
 
        if (!opts->no_bpf_event)
-               perf_evlist__stop_sb_thread(rec->sb_evlist);
+               evlist__stop_sb_thread(rec->sb_evlist);
        return status;
 }
 
@@ -2222,7 +2218,7 @@ static int record__parse_mmap_pages(const struct option *opt,
                *p = '\0';
 
        if (*s) {
-               ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+               ret = __evlist__parse_mmap_pages(&mmap_pages, s);
                if (ret)
                        goto out_free;
                opts->mmap_pages = mmap_pages;
@@ -2233,7 +2229,7 @@ static int record__parse_mmap_pages(const struct option *opt,
                goto out_free;
        }
 
-       ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+       ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
        if (ret)
                goto out_free;
 
@@ -2413,7 +2409,7 @@ static bool dry_run;
  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
  * with it and switch to use the library functions in perf_evlist that came
  * from builtin-record.c, i.e. use record_opts,
- * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
+ * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
  * using pipes, etc.
  */
 static struct option __record_options[] = {
@@ -2476,6 +2472,8 @@ static struct option __record_options[] = {
        OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
        OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
                    "Record the sample physical addresses"),
+       OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
+                   "Record the sampled data address data page size"),
        OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
        OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
                        &record.opts.sample_time_set,
@@ -2793,7 +2791,7 @@ int cmd_record(int argc, const char **argv)
        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
 
        err = -ENOMEM;
-       if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
+       if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
                usage_with_options(record_usage, record_options);
 
        err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
index 3c74c9c..2a845d6 100644 (file)
@@ -211,7 +211,7 @@ static void setup_forced_leader(struct report *report,
                                struct evlist *evlist)
 {
        if (report->group_set)
-               perf_evlist__force_leader(evlist);
+               evlist__force_leader(evlist);
 }
 
 static int process_feature_event(struct perf_session *session,
@@ -226,6 +226,8 @@ static int process_feature_event(struct perf_session *session,
                pr_err("failed: wrong feature ID: %" PRI_lu64 "\n",
                       event->feat.feat_id);
                return -1;
+       } else if (rep->header_only) {
+               session_done = 1;
        }
 
        /*
@@ -493,8 +495,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
        return ret + fprintf(fp, "\n#\n");
 }
 
-static int perf_evlist__tui_block_hists_browse(struct evlist *evlist,
-                                              struct report *rep)
+static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report *rep)
 {
        struct evsel *pos;
        int i = 0, ret;
@@ -511,9 +512,7 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist,
        return 0;
 }
 
-static int perf_evlist__tty_browse_hists(struct evlist *evlist,
-                                        struct report *rep,
-                                        const char *help)
+static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, const char *help)
 {
        struct evsel *pos;
        int i = 0;
@@ -566,7 +565,7 @@ static void report__warn_kptr_restrict(const struct report *rep)
        struct map *kernel_map = machine__kernel_map(&rep->session->machines.host);
        struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
 
-       if (perf_evlist__exclude_kernel(rep->session->evlist))
+       if (evlist__exclude_kernel(rep->session->evlist))
                return;
 
        if (kernel_map == NULL ||
@@ -595,7 +594,7 @@ static int report__gtk_browse_hists(struct report *rep, const char *help)
        int (*hist_browser)(struct evlist *evlist, const char *help,
                            struct hist_browser_timer *timer, float min_pcnt);
 
-       hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists");
+       hist_browser = dlsym(perf_gtk_handle, "evlist__gtk_browse_hists");
 
        if (hist_browser == NULL) {
                ui__error("GTK browser not found!\n");
@@ -622,14 +621,12 @@ static int report__browse_hists(struct report *rep)
        switch (use_browser) {
        case 1:
                if (rep->total_cycles_mode) {
-                       ret = perf_evlist__tui_block_hists_browse(evlist, rep);
+                       ret = evlist__tui_block_hists_browse(evlist, rep);
                        break;
                }
 
-               ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
-                                                   rep->min_percent,
-                                                   &session->header.env,
-                                                   true, &rep->annotation_opts);
+               ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent,
+                                              &session->header.env, true, &rep->annotation_opts);
                /*
                 * Usually "ret" is the last pressed key, and we only
                 * care if the key notifies us to switch data file.
@@ -641,7 +638,7 @@ static int report__browse_hists(struct report *rep)
                ret = report__gtk_browse_hists(rep, help);
                break;
        default:
-               ret = perf_evlist__tty_browse_hists(evlist, rep, help);
+               ret = evlist__tty_browse_hists(evlist, rep, help);
                break;
        }
 
@@ -933,7 +930,7 @@ static int __cmd_report(struct report *rep)
 
                if (dump_trace) {
                        perf_session__fprintf_nr_events(session, stdout);
-                       perf_evlist__fprintf_nr_events(session->evlist, stdout);
+                       evlist__fprintf_nr_events(session->evlist, stdout);
                        return 0;
                }
        }
@@ -1517,6 +1514,13 @@ repeat:
                perf_session__fprintf_info(session, stdout,
                                           report.show_full_info);
                if (report.header_only) {
+                       if (data.is_pipe) {
+                               /*
+                                * we need to process first few records
+                                * which contains PERF_RECORD_HEADER_FEATURE.
+                                */
+                               perf_session__process_events(session);
+                       }
                        ret = 0;
                        goto error;
                }
index 0e16f9d..69c769b 100644 (file)
@@ -3036,8 +3036,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
        setup_pager();
 
        /* prefer sched_waking if it is captured */
-       if (perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                 "sched:sched_waking"))
+       if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking"))
                handlers[1].handler = timehist_sched_wakeup_ignore;
 
        /* setup per-evsel handlers */
@@ -3045,8 +3044,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
                goto out;
 
        /* sched_switch event at a minimum needs to exist */
-       if (!perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                 "sched:sched_switch")) {
+       if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) {
                pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
                goto out;
        }
index 48588cc..edacfa9 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/thread-stack.h"
 #include "util/time-utils.h"
 #include "util/path.h"
+#include "util/event.h"
 #include "ui/ui.h"
 #include "print_binary.h"
 #include "archinsn.h"
@@ -115,6 +116,7 @@ enum perf_output_field {
        PERF_OUTPUT_SRCCODE         = 1ULL << 30,
        PERF_OUTPUT_IPC             = 1ULL << 31,
        PERF_OUTPUT_TOD             = 1ULL << 32,
+       PERF_OUTPUT_DATA_PAGE_SIZE  = 1ULL << 33,
 };
 
 struct perf_script {
@@ -179,6 +181,7 @@ struct output_option {
        {.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
        {.str = "ipc", .field = PERF_OUTPUT_IPC},
        {.str = "tod", .field = PERF_OUTPUT_TOD},
+       {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
 };
 
 enum {
@@ -251,7 +254,8 @@ static struct {
                              PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
                              PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
                              PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
-                             PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
+                             PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
+                             PERF_OUTPUT_DATA_PAGE_SIZE,
 
                .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
@@ -499,6 +503,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
            evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
                return -EINVAL;
 
+       if (PRINT_FIELD(DATA_PAGE_SIZE) &&
+           evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE))
+               return -EINVAL;
+
        return 0;
 }
 
@@ -1847,7 +1855,7 @@ static void perf_sample__fprint_metric(struct perf_script *script,
        u64 val;
 
        if (!evsel->stats)
-               perf_evlist__alloc_stats(script->session->evlist, false);
+               evlist__alloc_stats(script->session->evlist, false);
        if (evsel_script(evsel->leader)->gnum++ == 0)
                perf_stat__reset_shadow_stats();
        val = sample->period * evsel->scale;
@@ -1920,6 +1928,7 @@ static void process_event(struct perf_script *script,
        unsigned int type = output_type(attr->type);
        struct evsel_script *es = evsel->priv;
        FILE *fp = es->fp;
+       char str[PAGE_SIZE_NAME_LEN];
 
        if (output[type].fields == 0)
                return;
@@ -2008,6 +2017,9 @@ static void process_event(struct perf_script *script,
        if (PRINT_FIELD(PHYS_ADDR))
                fprintf(fp, "%16" PRIx64, sample->phys_addr);
 
+       if (PRINT_FIELD(DATA_PAGE_SIZE))
+               fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str));
+
        perf_sample__fprintf_ipc(sample, attr, fp);
 
        fprintf(fp, "\n");
@@ -2224,7 +2236,7 @@ static int print_event_with_time(struct perf_tool *tool,
 {
        struct perf_script *script = container_of(tool, struct perf_script, tool);
        struct perf_session *session = script->session;
-       struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+       struct evsel *evsel = evlist__id2evsel(session->evlist, sample->id);
        struct thread *thread = NULL;
 
        if (evsel && !evsel->core.attr.sample_id_all) {
@@ -3308,7 +3320,7 @@ static int set_maps(struct perf_script *script)
 
        perf_evlist__set_maps(&evlist->core, script->cpus, script->threads);
 
-       if (perf_evlist__alloc_stats(evlist, true))
+       if (evlist__alloc_stats(evlist, true))
                return -ENOMEM;
 
        script->allocated = true;
@@ -3506,7 +3518,8 @@ int cmd_script(int argc, const char **argv)
                     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
                     "addr,symoff,srcline,period,iregs,uregs,brstack,"
                     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
-                    "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod",
+                    "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
+                    "data_page_size",
                     parse_output_fields),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),
@@ -3935,7 +3948,7 @@ out_delete:
                zfree(&script.ptime_range);
        }
 
-       perf_evlist__free_stats(session->evlist);
+       evlist__free_stats(session->evlist);
        perf_session__delete(session);
 
        if (script_started)
index b01af17..89c3269 100644 (file)
@@ -270,7 +270,7 @@ static void perf_stat__reset_stats(void)
 {
        int i;
 
-       perf_evlist__reset_stats(evsel_list);
+       evlist__reset_stats(evsel_list);
        perf_stat__reset_shadow_stats();
 
        for (i = 0; i < stat_config.stats_num; i++)
@@ -534,7 +534,7 @@ static void disable_counters(void)
 static volatile int workload_exec_errno;
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1
+ * evlist__prepare_workload will send a SIGUSR1
  * if the fork fails, since we asked by setting its
  * want_signal to true.
  */
@@ -724,8 +724,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
        bool second_pass = false;
 
        if (forks) {
-               if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
-                                                 workload_exec_failed_signal) < 0) {
+               if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
                        perror("failed to prepare workload");
                        return -1;
                }
@@ -733,7 +732,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
        }
 
        if (group)
-               perf_evlist__set_leader(evsel_list);
+               evlist__set_leader(evsel_list);
 
        if (affinity__setup(&affinity) < 0)
                return -1;
@@ -760,7 +759,7 @@ try_again:
                                if ((errno == EINVAL || errno == EBADF) &&
                                    counter->leader != counter &&
                                    counter->weak_group) {
-                                       perf_evlist__reset_weak_group(evsel_list, counter, false);
+                                       evlist__reset_weak_group(evsel_list, counter, false);
                                        assert(counter->reset_group);
                                        second_pass = true;
                                        continue;
@@ -843,7 +842,7 @@ try_again_reset:
                        return -1;
        }
 
-       if (perf_evlist__apply_filters(evsel_list, &counter)) {
+       if (evlist__apply_filters(evsel_list, &counter)) {
                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
                        counter->filter, evsel__name(counter), errno,
                        str_error_r(errno, msg, sizeof(msg)));
@@ -876,7 +875,7 @@ try_again_reset:
        clock_gettime(CLOCK_MONOTONIC, &ref_time);
 
        if (forks) {
-               perf_evlist__start_workload(evsel_list);
+               evlist__start_workload(evsel_list);
                enable_counters();
 
                if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
@@ -914,10 +913,10 @@ try_again_reset:
                update_stats(&walltime_nsecs_stats, t1 - t0);
 
                if (stat_config.aggr_mode == AGGR_GLOBAL)
-                       perf_evlist__save_aggr_prev_raw_counts(evsel_list);
+                       evlist__save_aggr_prev_raw_counts(evsel_list);
 
-               perf_evlist__copy_prev_raw_counts(evsel_list);
-               perf_evlist__reset_prev_raw_counts(evsel_list);
+               evlist__copy_prev_raw_counts(evsel_list);
+               evlist__reset_prev_raw_counts(evsel_list);
                runtime_stat_reset(&stat_config);
                perf_stat__reset_shadow_per_stat(&rt_stat);
        } else
@@ -972,9 +971,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
        /* Do not print anything if we record to the pipe. */
        if (STAT_RECORD && perf_stat.data.is_pipe)
                return;
+       if (stat_config.quiet)
+               return;
 
-       perf_evlist__print_counters(evsel_list, &stat_config, &target,
-                                   ts, argc, argv);
+       evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
 }
 
 static volatile int signr = -1;
@@ -1171,6 +1171,8 @@ static struct option stat_options[] = {
                    "threads of same physical core"),
        OPT_BOOLEAN(0, "summary", &stat_config.summary,
                       "print summary for interval mode"),
+       OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
+                       "don't print output (useful with record)"),
 #ifdef HAVE_LIBPFM
        OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
                "libpfm4 event selector. use 'perf list' to list available events",
@@ -1904,7 +1906,7 @@ static int set_maps(struct perf_stat *st)
 
        perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
 
-       if (perf_evlist__alloc_stats(evsel_list, true))
+       if (evlist__alloc_stats(evsel_list, true))
                return -ENOMEM;
 
        st->maps_allocated = true;
@@ -2132,7 +2134,7 @@ int cmd_stat(int argc, const char **argv)
                goto out;
        }
 
-       if (!output) {
+       if (!output && !stat_config.quiet) {
                struct timespec tm;
                mode = append_file ? "a" : "w";
 
@@ -2235,8 +2237,11 @@ int cmd_stat(int argc, const char **argv)
                }
 
                if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
-                                         &stat_config.metric_events, true) < 0)
+                                         &stat_config.metric_events, true) < 0) {
+                       parse_options_usage(stat_usage, stat_options,
+                                           "for-each-cgroup", 0);
                        goto out;
+               }
        }
 
        target__validate(&target);
@@ -2244,7 +2249,7 @@ int cmd_stat(int argc, const char **argv)
        if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
                target.per_thread = true;
 
-       if (perf_evlist__create_maps(evsel_list, &target) < 0) {
+       if (evlist__create_maps(evsel_list, &target) < 0) {
                if (target__has_task(&target)) {
                        pr_err("Problems finding threads of monitor\n");
                        parse_options_usage(stat_usage, stat_options, "p", 1);
@@ -2303,7 +2308,7 @@ int cmd_stat(int argc, const char **argv)
                goto out;
        }
 
-       if (perf_evlist__alloc_stats(evsel_list, interval))
+       if (evlist__alloc_stats(evsel_list, interval))
                goto out;
 
        if (perf_stat_init_aggr_mode())
@@ -2343,7 +2348,7 @@ int cmd_stat(int argc, const char **argv)
                                run_idx + 1);
 
                if (run_idx != 0)
-                       perf_evlist__reset_prev_raw_counts(evsel_list);
+                       evlist__reset_prev_raw_counts(evsel_list);
 
                status = run_perf_stat(argc, argv, run_idx);
                if (forever && status != -1 && !interval) {
@@ -2394,7 +2399,7 @@ int cmd_stat(int argc, const char **argv)
        }
 
        perf_stat__exit_aggr_mode();
-       perf_evlist__free_stats(evsel_list);
+       evlist__free_stats(evsel_list);
 out:
        zfree(&stat_config.walltime_run);
 
index 7c64134..3673c04 100644 (file)
@@ -641,12 +641,9 @@ repeat:
                hists->uid_filter_str = top->record_opts.target.uid_str;
        }
 
-       ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
-                                     top->min_percent,
-                                     &top->session->header.env,
-                                     !top->record_opts.overwrite,
-                                     &top->annotation_opts);
-
+       ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
+                                      &top->session->header.env, !top->record_opts.overwrite,
+                                      &top->annotation_opts);
        if (ret == K_RELOAD) {
                top->zero = true;
                goto repeat;
@@ -782,7 +779,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
        if (!machine->kptr_restrict_warned &&
            symbol_conf.kptr_restrict &&
            al.cpumode == PERF_RECORD_MISC_KERNEL) {
-               if (!perf_evlist__exclude_kernel(top->session->evlist)) {
+               if (!evlist__exclude_kernel(top->session->evlist)) {
                        ui__warning(
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
@@ -890,7 +887,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
        while ((event = perf_mmap__read_event(&md->core)) != NULL) {
                int ret;
 
-               ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
+               ret = evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
                if (ret && ret != -1)
                        break;
 
@@ -918,14 +915,14 @@ static void perf_top__mmap_read(struct perf_top *top)
        int i;
 
        if (overwrite)
-               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+               evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
 
        for (i = 0; i < top->evlist->core.nr_mmaps; i++)
                perf_top__mmap_read_idx(top, i);
 
        if (overwrite) {
-               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
-               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+               evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+               evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
        }
 }
 
@@ -1025,7 +1022,7 @@ static int perf_top__start_counters(struct perf_top *top)
                goto out_err;
        }
 
-       perf_evlist__config(evlist, opts, &callchain_param);
+       evlist__config(evlist, opts, &callchain_param);
 
        evlist__for_each_entry(evlist, counter) {
 try_again:
@@ -1153,13 +1150,13 @@ static int deliver_event(struct ordered_events *qe,
                return 0;
        }
 
-       ret = perf_evlist__parse_sample(evlist, event, &sample);
+       ret = evlist__parse_sample(evlist, event, &sample);
        if (ret) {
                pr_err("Can't parse sample, err = %d\n", ret);
                goto next_event;
        }
 
-       evsel = perf_evlist__id2evsel(session->evlist, sample.id);
+       evsel = evlist__id2evsel(session->evlist, sample.id);
        assert(evsel != NULL);
 
        if (event->header.type == PERF_RECORD_SAMPLE) {
@@ -1469,8 +1466,7 @@ int cmd_top(int argc, const char **argv)
        OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
                    "hide kernel symbols"),
        OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
-                    "number of mmap data pages",
-                    perf_evlist__parse_mmap_pages),
+                    "number of mmap data pages", evlist__parse_mmap_pages),
        OPT_INTEGER('r', "realtime", &top.realtime_prio,
                    "collect data with this RT SCHED_FIFO priority"),
        OPT_INTEGER('d', "delay", &top.delay_secs,
@@ -1697,7 +1693,7 @@ int cmd_top(int argc, const char **argv)
        if (target__none(target))
                target->system_wide = true;
 
-       if (perf_evlist__create_maps(top.evlist, target) < 0) {
+       if (evlist__create_maps(top.evlist, target) < 0) {
                ui__error("Couldn't create thread/CPU maps: %s\n",
                          errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
                goto out_delete_evlist;
@@ -1762,7 +1758,7 @@ int cmd_top(int argc, const char **argv)
        }
 #endif
 
-       if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
+       if (evlist__start_sb_thread(top.sb_evlist, target)) {
                pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
                opts->no_bpf_event = true;
        }
@@ -1770,7 +1766,7 @@ int cmd_top(int argc, const char **argv)
        status = __cmd_top(&top);
 
        if (!opts->no_bpf_event)
-               perf_evlist__stop_sb_thread(top.sb_evlist);
+               evlist__stop_sb_thread(top.sb_evlist);
 
 out_delete_evlist:
        evlist__delete(top.evlist);
index de80534..85b6a46 100644 (file)
@@ -3105,7 +3105,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
                return;
        }
 
-       evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
+       evsel = evlist__id2evsel(trace->evlist, sample->id);
        if (evsel == NULL) {
                fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
                return;
@@ -3666,7 +3666,7 @@ static int trace__set_filter_loop_pids(struct trace *trace)
                thread = parent;
        }
 
-       err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
+       err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
        if (!err && trace->filter_pids.map)
                err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
 
@@ -3680,11 +3680,11 @@ static int trace__set_filter_pids(struct trace *trace)
         * Better not use !target__has_task() here because we need to cover the
         * case where no threads were specified in the command line, but a
         * workload was, and in that case we will fill in the thread_map when
-        * we fork the workload in perf_evlist__prepare_workload.
+        * we fork the workload in evlist__prepare_workload.
         */
        if (trace->filter_pids.nr > 0) {
-               err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
-                                                        trace->filter_pids.entries);
+               err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
+                                                   trace->filter_pids.entries);
                if (!err && trace->filter_pids.map) {
                        err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
                                                       trace->filter_pids.entries);
@@ -3700,9 +3700,8 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event)
 {
        struct evlist *evlist = trace->evlist;
        struct perf_sample sample;
-       int err;
+       int err = evlist__parse_sample(evlist, event, &sample);
 
-       err = perf_evlist__parse_sample(evlist, event, &sample);
        if (err)
                fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
        else
@@ -3735,7 +3734,7 @@ static int trace__deliver_event(struct trace *trace, union perf_event *event)
        if (!trace->sort_events)
                return __trace__deliver_event(trace, event);
 
-       err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
+       err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
        if (err && err != -1)
                return err;
 
@@ -3951,7 +3950,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (trace->cgroup)
                evlist__set_default_cgroup(trace->evlist, trace->cgroup);
 
-       err = perf_evlist__create_maps(evlist, &trace->opts.target);
+       err = evlist__create_maps(evlist, &trace->opts.target);
        if (err < 0) {
                fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
                goto out_delete_evlist;
@@ -3963,14 +3962,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
                goto out_delete_evlist;
        }
 
-       perf_evlist__config(evlist, &trace->opts, &callchain_param);
+       evlist__config(evlist, &trace->opts, &callchain_param);
 
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
 
        if (forks) {
-               err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
-                                                   argv, false, NULL);
+               err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
                if (err < 0) {
                        fprintf(trace->output, "Couldn't run the workload!\n");
                        goto out_delete_evlist;
@@ -4028,7 +4026,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        err = trace__expand_filters(trace, &evsel);
        if (err)
                goto out_delete_evlist;
-       err = perf_evlist__apply_filters(evlist, &evsel);
+       err = evlist__apply_filters(evlist, &evsel);
        if (err < 0)
                goto out_error_apply_filters;
 
@@ -4043,7 +4041,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
                evlist__enable(evlist);
 
        if (forks)
-               perf_evlist__start_workload(evlist);
+               evlist__start_workload(evlist);
 
        if (trace->opts.initial_delay) {
                usleep(trace->opts.initial_delay * 1000);
@@ -4229,12 +4227,10 @@ static int trace__replay(struct trace *trace)
        if (err)
                goto out;
 
-       evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                    "raw_syscalls:sys_enter");
+       evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
        /* older kernels have syscalls tp versus raw_syscalls */
        if (evsel == NULL)
-               evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                            "syscalls:sys_enter");
+               evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
 
        if (evsel &&
            (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
@@ -4243,11 +4239,9 @@ static int trace__replay(struct trace *trace)
                goto out;
        }
 
-       evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                    "raw_syscalls:sys_exit");
+       evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
        if (evsel == NULL)
-               evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-                                                            "syscalls:sys_exit");
+               evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
        if (evsel &&
            (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
            perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
@@ -4769,8 +4763,7 @@ int cmd_trace(int argc, const char **argv)
        OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
                    "child tasks do not inherit counters"),
        OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
-                    "number of mmap data pages",
-                    perf_evlist__parse_mmap_pages),
+                    "number of mmap data pages", evlist__parse_mmap_pages),
        OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
                   "user to profile"),
        OPT_CALLBACK(0, "duration", &trace, "float",
@@ -4907,7 +4900,7 @@ int cmd_trace(int argc, const char **argv)
        if (evsel) {
                trace.syscalls.events.augmented = evsel;
 
-               evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
+               evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
                if (evsel == NULL) {
                        pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
                        goto out;
index d09ec2f..9cd074a 100644 (file)
@@ -80,6 +80,7 @@ static void library_status(void)
        STATUS(HAVE_LIBBPF_SUPPORT, bpf);
        STATUS(HAVE_AIO_SUPPORT, aio);
        STATUS(HAVE_ZSTD_SUPPORT, zstd);
+       STATUS(HAVE_LIBPFM, libpfm4);
 }
 
 int cmd_version(int argc, const char **argv)
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json
new file mode 100644 (file)
index 0000000..3b1cd70
--- /dev/null
@@ -0,0 +1,39 @@
+[
+   {
+           "BriefDescription": "ddr cycles event",
+           "EventCode": "0x00",
+           "EventName": "imx8mm_ddr.cycles",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr read-cycles event",
+           "EventCode": "0x2a",
+           "EventName": "imx8mm_ddr.read_cycles",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr write-cycles event",
+           "EventCode": "0x2b",
+           "EventName": "imx8mm_ddr.write_cycles",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr read event",
+           "EventCode": "0x35",
+           "EventName": "imx8mm_ddr.read",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr write event",
+           "EventCode": "0x38",
+           "EventName": "imx8mm_ddr.write",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   }
+]
+
+
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json
new file mode 100644 (file)
index 0000000..8e553b6
--- /dev/null
@@ -0,0 +1,18 @@
+[
+   {
+           "BriefDescription": "bytes all masters read from ddr based on read-cycles event",
+           "MetricName": "imx8mm_ddr_read.all",
+           "MetricExpr": "imx8mm_ddr.read_cycles * 4 * 4",
+           "ScaleUnit": "9.765625e-4KB",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+    },
+   {
+           "BriefDescription": "bytes all masters write to ddr based on write-cycles event",
+           "MetricName": "imx8mm_ddr_write.all",
+           "MetricExpr": "imx8mm_ddr.write_cycles * 4 * 4",
+           "ScaleUnit": "9.765625e-4KB",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/test/arch-std-events.json b/tools/perf/pmu-events/arch/test/arch-std-events.json
new file mode 100644 (file)
index 0000000..43f6f72
--- /dev/null
@@ -0,0 +1,8 @@
+[
+    {
+        "PublicDescription": "Attributable Level 3 cache access, read",
+        "EventCode": "0x40",
+        "EventName": "L3_CACHE_RD",
+        "BriefDescription": "L3 cache access, read"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/cache.json b/tools/perf/pmu-events/arch/test/test_cpu/cache.json
new file mode 100644 (file)
index 0000000..036d0ef
--- /dev/null
@@ -0,0 +1,5 @@
+[
+    {
+        "ArchStdEvent": "L3_CACHE_RD"
+    }
+]
\ No newline at end of file
index 7204581..27ea2b0 100644 (file)
 [
     {
-        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
-        "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0x21",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read miss L2, no rejects",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x22",
-        "EventName": "L2_RQSTS.RFO_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "L2_RQSTS.CODE_RD_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 cache misses when fetching instructions",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400108000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Demand requests that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x27",
-        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand requests that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x38",
-        "EventName": "L2_RQSTS.PF_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0080001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "All requests that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
         "Counter": "0,1,2,3",
-        "UMask": "0x3f",
-        "EventName": "L2_RQSTS.MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "All requests that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand code readshave any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0xc1",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read requests that hit L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code readshave any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0xc2",
-        "EventName": "L2_RQSTS.RFO_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests that hit L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
-        "EventCode": "0x24",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0xc4",
-        "EventName": "L2_RQSTS.CODE_RD_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0xd8",
-        "EventName": "L2_RQSTS.PF_HIT",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x38"
     },
     {
-        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0xe1",
-        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C0004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0xe2",
-        "EventName": "L2_RQSTS.ALL_RFO",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests to L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the total number of L2 code requests.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0xe4",
-        "EventName": "L2_RQSTS.ALL_CODE_RD",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 code requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040100001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Demand requests to L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Demand requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0xe7",
-        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand requests to L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0xf8",
-        "EventName": "L2_RQSTS.ALL_PF",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "PublicDescription": "Demand requests that miss L2 cache.",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x27"
     },
     {
-        "PublicDescription": "All L2 requests.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0xff",
-        "EventName": "L2_RQSTS.REFERENCES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "All L2 requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400100002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
-        "EventCode": "0x2E",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "Errata": "SKL057",
-        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Core-originated cacheable demand requests missed L3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
-        "EventCode": "0x2E",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4f",
-        "Errata": "SKL057",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
-        "EventCode": "0x48",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L1D_PEND_MISS.PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "L1D miss outstandings duration in cycles",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
-        "EventCode": "0x48",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with L1D load Misses outstanding.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x48",
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
-        "EventCode": "0x48",
+        "BriefDescription": "L2 writebacks that access L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "L1D_PEND_MISS.FB_FULL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF0",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
-        "EventCode": "0x51",
+        "BriefDescription": "L2 cache lines filling L2",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L1D.REPLACEMENT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "L1D data line replacements",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF1",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1f"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C0002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400040004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080080002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080100001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C0004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Demand Data Read requests sent to uncore",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C0002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Cacheable and noncachaeble code read requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080100004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Demand and prefetch data reads",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040020001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Any memory transaction that reached the SQ.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
-        "EventCode": "0xB2",
+        "BriefDescription": "All retired store instructions.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x82"
     },
     {
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "EventCode": "0xB7, 0xBB",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080020004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions that miss the STLB.",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
         "Counter": "0,1,2,3",
-        "UMask": "0x11",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired store instructions that miss the STLB.",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x12",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "L1_Hit_Indication": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040048000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x21",
-        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000088000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x42",
-        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C0002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "L1_Hit_Indication": "1"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x81",
-        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "All retired load instructions. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080020001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "All retired store instructions.",
-        "EventCode": "0xD0",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x82",
-        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "All retired store instructions. (Precise Event)",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "L1_Hit_Indication": "1"
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C0004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C0001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C8000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD2",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000080001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
-        "EventCode": "0xD2",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
-        "EventCode": "0xD2",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x41"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
-        "EventCode": "0xD2",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf8"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD4",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
-        "EventCode": "0xF0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "L2_TRANS.L2_WB",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 writebacks that access L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
-        "EventCode": "0xF1",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1f",
-        "EventName": "L2_LINES_IN.ALL",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "L2 cache lines filling L2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xF2",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L2_LINES_OUT.SILENT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xF2",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "L2_LINES_OUT.NON_SILENT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
-        "EventCode": "0xF2",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "L2_LINES_OUT.USELESS_PREF",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "EventCode": "0xF2",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "L2_LINES_OUT.USELESS_HWPF",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
-        "EventCode": "0xF4",
+        "BriefDescription": "RFO requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "SQ_MISC.SPLIT_LOCK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of cache line split locks sent to uncore.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x22"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000408000",
+        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040028000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400408000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200408000",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100408000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080408000",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_HWPF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C8000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)have any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)have any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C8000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C8000",
+        "BriefDescription": "All requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.MISS",
+        "PublicDescription": "All requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3f"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C8000",
+        "BriefDescription": "L2 code requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "Counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe4"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0108000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000108000",
+        "BriefDescription": "RFO requests that hit L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400108000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200108000",
+        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100108000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080108000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040108000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0028000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0088000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000088000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400088000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200088000",
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100088000",
+        "BriefDescription": "L2 cache misses when fetching instructions",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x24"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080088000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040088000",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000048000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400048000",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200048000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080048000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0028000",
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000028000",
+        "BriefDescription": "Demand requests to L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "PublicDescription": "Demand requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe7"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400028000",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100048000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200028000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100028000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080028000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040028000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000018000",
+        "AnyThread": "1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests have any response type.",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400004",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x4f"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C0004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C0004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C0004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C0004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C0004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C0004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C0004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0100004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000100004",
+        "BriefDescription": "Retired load instructions that miss the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "PEBS": "1",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x11"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400100004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200100004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100100004",
+        "BriefDescription": "Counts demand data readshave any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data readshave any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080100004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040100004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0080004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000080004",
+        "BriefDescription": "L1D data line replacements",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x51",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400080004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200080004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100080004",
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD4",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080080004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040080004",
+        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0040004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000040004",
+        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_PREF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400040004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200040004",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xd8"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100040004",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x21"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080040004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040040004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0020004",
+        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020004",
+        "BriefDescription": "All retired load instructions.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x81"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020004",
+        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "20011",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020004",
+        "BriefDescription": "Demand Data Read requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020004",
+        "BriefDescription": "All L2 requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "PublicDescription": "All L2 requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xff"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040020004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010004",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400002",
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400002",
+        "BriefDescription": "Number of cache line split locks sent to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF4",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040400002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C0002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C0002",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C0002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C0002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C0002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C0002",
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C0002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0100002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000100002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400100002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200100002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100100002",
+        "BriefDescription": "L1D miss outstandings duration in cycles",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080100002",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc1"
+    },
+    {
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040100002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0080002",
+        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000080002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400080002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200080002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100080002",
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x42"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080080002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040080002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0040002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000040002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400040002",
+        "BriefDescription": "Any memory transaction that reached the SQ.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200040002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100040002",
+        "BriefDescription": "Counts any other requestshave any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000018000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requestshave any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080040002",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040040002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0020002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020002",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020002",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040020002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400001",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C0001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C0001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C0001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C0001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C0001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C0001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C0001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0100001",
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000100001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400100001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200100001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100100001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080100001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040100001",
+        "BriefDescription": "Retired load instructions with locked access.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x21"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0080001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000080001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400080001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200080001",
+        "BriefDescription": "Demand and prefetch data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100080001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080080001",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040080001",
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x41"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0040001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000040001",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB2",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400040001",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200040001",
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100040001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200080002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080040001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040040001",
+        "BriefDescription": "Retired store instructions that miss the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
         "SampleAfterValue": "100003",
+        "UMask": "0x12"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0020001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020001",
+        "BriefDescription": "RFO requests to L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe2"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020001",
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020001",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads have any response type.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
index 213dd62..834e1cd 100644 (file)
@@ -1,67 +1,67 @@
 [
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
         "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xCA",
+        "EventName": "FP_ASSIST.ANY",
+        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1e"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
-        "EventCode": "0xCA",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1e",
-        "EventName": "FP_ASSIST.ANY",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles with any input/output SSE or FP assist",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     }
 ]
\ No newline at end of file
index 7fa95a3..e84504d 100644 (file)
 [
     {
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "IDQ.MITE_UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x80",
+        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "IDQ.MITE_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x14",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "IDQ.DSB_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x408006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
         "EventName": "IDQ.DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "IDQ.MS_DSB_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
         "Counter": "0,1,2,3",
-        "UMask": "0x18",
-        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xE6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x18",
-        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x11",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "IDQ.MS_MITE_UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles MITE is delivering 4 Uops",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x401006",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "EventCode": "0x79",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles MITE is delivering any Uop",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "IDQ.MS_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EdgeDetect": "1",
-        "EventName": "IDQ.MS_SWITCHES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
         "EventCode": "0x79",
-        "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "IDQ.MS_UOPS",
+        "EventName": "IDQ.MS_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x30"
     },
     {
-        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
-        "EventCode": "0x80",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x24"
     },
     {
-        "EventCode": "0x83",
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
         "EventName": "ICACHE_64B.IFTAG_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x83",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "ICACHE_64B.IFTAG_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_SWITCHES",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
     },
     {
-        "EventCode": "0x83",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ICACHE_64B.IFTAG_STALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x13",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x404006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_STALL",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     },
     {
-        "EventCode": "0x9C",
-        "Invert": "1",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
-        "EventCode": "0xAB",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x18"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x11",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x15",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x12",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.L1I_MISS",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x13",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x420006",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x14",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400806",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x15",
+        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400106",
+        "PEBS": "2",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x400206",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x200206",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400406",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x400406",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x24"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x400806",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x401006",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x402006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x410006",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x404006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x200206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x408006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x300206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x410006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x100206",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x420006",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x18"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x100206",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x402006",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x300206",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "Invert": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x12",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
index f197b4c..7bd3ae3 100644 (file)
 [
     {
-        "PublicDescription": "Number of times a TSX line had a cache conflict.",
-        "EventCode": "0x54",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "TX_MEM.ABORT_CONFLICT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x54",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "TX_MEM.ABORT_CAPACITY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000100002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084008000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "TX_EXEC.MISC1",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204000004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "TX_EXEC.MISC2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000088000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "TX_EXEC.MISC3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "RTM region detected inside HLE.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "TX_EXEC.MISC4",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "TX_EXEC.MISC5",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_TIMER",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204000002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
         "Counter": "0,1,2,3",
-        "UMask": "0x6",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000080002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Demand Data Read requests who miss L3 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
-        "EventCode": "0xC3",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "Errata": "SKL089",
-        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004008000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
-        "EventCode": "0xC8",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "HLE_RETIRED.START",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution started.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times HLE commit succeeded.",
-        "EventCode": "0xC8",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "HLE_RETIRED.COMMIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution successfully committed",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PEBS": "1",
-        "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
-        "EventCode": "0xC8",
+        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "HLE_RETIRED.ABORTED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "HLE_RETIRED.ABORTED_MEM",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "HLE_RETIRED.ABORTED_TIMER",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000020004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RTM_RETIRED.START",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution started.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044000002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times RTM commit succeeded.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "RTM_RETIRED.COMMIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution successfully committed",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204008000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
-        "EventCode": "0xC9",
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "RTM_RETIRED.ABORTED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "RTM_RETIRED.ABORTED_MEM",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC9",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000080001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "2003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x4",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
-        "MSRIndex": "0x3F6",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x8",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x10",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x20",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC5",
+        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x40",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "2003",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x80",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "1009",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x100",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "503",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x200",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "101",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C408000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C408000",
+        "BriefDescription": "Number of times an RTM execution successfully committed",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C408000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C408000",
+        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC408000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C408000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004008000",
+        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104008000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084008000",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044008000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000408000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000108000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000088000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000028000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC400004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C400004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC400004",
+        "BriefDescription": "Number of times an HLE execution successfully committed",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4000004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000004",
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044000004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000400004",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C0004",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000100004",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "503",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000080004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000040004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C400002",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times RTM abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C400002",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times HLE abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C400002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C400002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C400002",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "20011",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C400002",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4000002",
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000002",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "101",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000002",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CAPACITY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000002",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000002",
+        "BriefDescription": "Number of times an RTM execution started.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044000002",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL089",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000400002",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C0002",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000100002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000080002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000040002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC400001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4000002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C400001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C400001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C400001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC400001",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C400001",
+        "BriefDescription": "Number of times an HLE execution started.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4000001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000001",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "1009",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000001",
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044000001",
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x6"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C0001",
+        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000100001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C8000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000080001",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000040001",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "50021",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
index 84a316d..1a3683f 100644 (file)
@@ -1,48 +1,56 @@
 [
     {
-        "EventCode": "0x32",
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0x32",
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
         "EventName": "SW_PREFETCH_ACCESS.T0",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0x32",
+        "BriefDescription": "Number of hardware interrupts received by the processor.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCB",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+        "SampleAfterValue": "203",
+        "UMask": "0x1"
     },
     {
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHW instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
-        "EventCode": "0xCB",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "HW_INTERRUPTS.RECEIVED",
-        "SampleAfterValue": "203",
-        "BriefDescription": "Number of hardware interrupts received by the processor.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
index 4a891fb..f46e93a 100644 (file)
 [
     {
-        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
-        "Counter": "Fixed counter 0",
-        "UMask": "0x1",
-        "EventName": "INST_RETIRED.ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Instructions retired from execution.",
-        "CounterHTOff": "Fixed counter 0"
-    },
-    {
-        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
-        "Counter": "Fixed counter 1",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_UNHALTED.THREAD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when the thread is not in halt state",
-        "CounterHTOff": "Fixed counter 1"
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003"
     },
     {
-        "Counter": "Fixed counter 1",
-        "UMask": "0x2",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "CounterHTOff": "Fixed counter 1"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
-        "Counter": "Fixed counter 2",
-        "UMask": "0x3",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "CounterHTOff": "Fixed counter 2"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
-        "EventCode": "0x03",
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "LD_BLOCKS.STORE_FORWARD",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x14",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
-        "EventCode": "0x03",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "LD_BLOCKS.NO_SR",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x07",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
         "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
-        "EventCode": "0x0D",
+        "BriefDescription": "Far branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "INT_MISC.RECOVERY_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "PublicDescription": "This event counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
     },
     {
-        "EventCode": "0x0D",
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.X87",
+        "PublicDescription": "Counts the number of x87 uops executed.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "EventCode": "0x0D",
+        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x4C",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
-        "EventCode": "0x0E",
+        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_ISSUED.ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
-        "EventCode": "0x0E",
-        "Invert": "1",
+        "BriefDescription": "Total execution stalls.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.",
-        "EventCode": "0x0E",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x0E",
-        "Counter": "0,1,2,3",
-        "UMask": "0x20",
         "EventName": "UOPS_ISSUED.SLOW_LEA",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "EventCode": "0x14",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "10",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
-        "EventCode": "0x3C",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
         "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Thread cycles when thread is not in halt state",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
-        "EventCode": "0x3C",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EdgeDetect": "1",
-        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3C",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
         "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "Invert": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
-        "EventCode": "0x4C",
+        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LOAD_HIT_PRE.SW_PF",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
-        "EventCode": "0x59",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "8",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
-        "EventCode": "0x5E",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
-        "EventCode": "0x5E",
-        "Invert": "1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "RS_EVENTS.EMPTY_END",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
-        "EventCode": "0x87",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ILD_STALL.LCP",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 0",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 5",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 7",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts resource-related stall cycles.",
-        "EventCode": "0xa2",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RESOURCE_STALLS.ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource-related stall cycles",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
-        "EventCode": "0xA2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "RESOURCE_STALLS.SB",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "Counter": "1",
+        "CounterHTOff": "1",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "PEBS": "2",
+        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Total execution stalls.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
         "Counter": "0,1,2,3",
-        "UMask": "0x5",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
-        "CounterMask": "5",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xA3",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
-        "CounterMask": "8",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
         "Counter": "0,1,2,3",
-        "UMask": "0xc",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x59",
+        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
-        "CounterMask": "12",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
-        "CounterMask": "16",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x14",
-        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
-        "CounterMask": "20",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
-        "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "CounterHTOff": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x3"
     },
     {
-        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "SampleAfterValue": "400009"
     },
     {
-        "EventCode": "0xA6",
+        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC1",
+        "EventName": "OTHER_ASSISTS.ANY",
+        "SampleAfterValue": "100003",
+        "UMask": "0x3f"
     },
     {
-        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
-        "EventCode": "0xA8",
+        "BriefDescription": "Cycles without actually retired uops.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of Uops delivered by the LSD.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
-        "EventCode": "0xA8",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.CYCLES_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA8",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
-        "EventCode": "0xA8",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.THREAD",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x87",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
-        "EventCode": "0xB1",
-        "Invert": "1",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EdgeDetect": "1",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "Invert": "1",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "16",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Taken branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of uops executed from any thread.",
-        "EventCode": "0xB1",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of uops executed on the core.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xB1",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xB1",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xB1",
+        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40"
     },
     {
-        "EventCode": "0xB1",
+        "BriefDescription": "Resource-related stall cycles",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa2",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "Counts resource-related stall cycles.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xB1",
-        "Invert": "1",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "5",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x5"
     },
     {
-        "PublicDescription": "Counts the number of x87 uops executed.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "UOPS_EXECUTED.X87",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of x87 uops dispatched.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
-        "EventCode": "0xC0",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "Errata": "SKL091, SKL044",
-        "EventName": "INST_RETIRED.ANY_P",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "CounterMask": "20",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x14"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
-        "EventCode": "0xC0",
-        "Counter": "1",
-        "UMask": "0x1",
-        "Errata": "SKL091, SKL044",
-        "EventName": "INST_RETIRED.PREC_DIST",
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
-        "CounterHTOff": "1"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
-        "EventCode": "0xC0",
-        "Invert": "1",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
         "Counter": "0,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,2,3",
+        "CounterMask": "10",
         "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
         "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "Invert": "1",
+        "PEBS": "2",
+        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
-        "CounterMask": "10",
-        "CounterHTOff": "0,2,3"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC1",
+        "BriefDescription": "Retirement slots used.",
         "Counter": "0,1,2,3",
-        "UMask": "0x3f",
-        "EventName": "OTHER_ASSISTS.ANY",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
         "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "Counts the retirement slots used.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Retirement slots used.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "This event counts cycles without actually retired uops.",
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "BriefDescription": "Number of macro-fused uops retired. (non precise)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "PublicDescription": "Counts the number of macro-fused uops retired. (non precise)",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
-        "CounterMask": "10",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Number of machine clears (nukes) of any type.",
-        "EventCode": "0xC3",
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "MACHINE_CLEARS.COUNT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of machine clears (nukes) of any type.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
-        "EventCode": "0xC3",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MACHINE_CLEARS.SMC",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Self-modifying code (SMC) detected.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all (macro) branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "CounterHTOff": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "All (macro) branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.CONDITIONAL",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Conditional branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA2",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Direct and indirect near call instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+        "SampleAfterValue": "100007"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
+        "CounterHTOff": "0,1,2,3",
         "Errata": "SKL091",
+        "EventCode": "0xC4",
         "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "All (macro) branch instructions retired.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x4"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Return instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
+        "EventCode": "0xC4",
         "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts return instructions retired.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Return instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Counts all not taken macro branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Taken branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Conditional branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of far branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
-        "EventCode": "0xC5",
-        "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts conditional branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "All mispredicted macro branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
         "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted conditional branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Number of uops executed on the core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted macro branch instructions retired.",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "12",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0xc"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
-        "EventCode": "0xCC",
+        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Increments whenever there is an update to the LBR array.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x80"
     },
     {
-        "EventCode": "0xCC",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009"
+    },
+    {
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
-        "EventCode": "0xE6",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "BACLEARS.ANY",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
index 8704efe..4cd2467 100644 (file)
 [
     {
-        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
         "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
         "MetricGroup": "TopdownL1",
-        "MetricName": "Frontend_Bound",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+        "MetricName": "Frontend_Bound"
     },
     {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Frontend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Frontend_Bound_SMT"
     },
     {
-        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
         "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
         "MetricGroup": "TopdownL1",
-        "MetricName": "Bad_Speculation",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
+        "MetricName": "Bad_Speculation"
     },
     {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Bad_Speculation_SMT",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Bad_Speculation_SMT"
     },
     {
+        "MetricConstraint": "NO_NMI_WATCHDOG",
+        "MetricGroup": "TopdownL1",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
         "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Backend_Bound",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
+        "MetricName": "Backend_Bound"
     },
     {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Backend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Backend_Bound_SMT"
     },
     {
-        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
         "MetricGroup": "TopdownL1",
-        "MetricName": "Retiring",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. "
+        "MetricName": "Retiring"
     },
     {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Retiring_SMT",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Retiring_SMT"
     },
     {
-        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
-        "MetricGroup": "TopDownL1",
+        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+        "MetricGroup": "Summary",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "BriefDescription": "Uops Per Instruction",
         "MetricGroup": "Pipeline;Retire",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Instruction per taken branch",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
         "MetricGroup": "Branches;Fetch_BW;PGO",
         "MetricName": "IpTB"
     },
     {
-        "BriefDescription": "Branch instructions per taken branch. ",
-        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
-        "MetricGroup": "Branches;PGO",
-        "MetricName": "BpTB"
-    },
-    {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
-        "MetricGroup": "PGO;IcMiss",
-        "MetricName": "IFetch_Line_Utilization"
-    },
-    {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
-        "MetricGroup": "DSB;Fetch_BW",
-        "MetricName": "DSB_Coverage"
-    },
-    {
-        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
-        "MetricGroup": "Pipeline;Summary",
+        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+        "MetricGroup": "Pipeline",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
         "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
-        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
         "MetricGroup": "TopDownL1_SMT",
         "MetricName": "SLOTS_SMT"
     },
     {
-        "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
-        "MetricGroup": "Instruction_Type",
-        "MetricName": "IpL"
-    },
-    {
-        "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
-        "MetricGroup": "Instruction_Type",
-        "MetricName": "IpS"
-    },
-    {
-        "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Branches;Instruction_Type",
-        "MetricName": "IpB"
-    },
-    {
-        "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
-        "MetricGroup": "Branches",
-        "MetricName": "IpCall"
-    },
-    {
-        "BriefDescription": "Total number of retired Instructions",
-        "MetricExpr": "INST_RETIRED.ANY",
-        "MetricGroup": "Summary",
-        "MetricName": "Instructions"
-    },
-    {
-        "BriefDescription": "Instructions Per Cycle (per physical core)",
         "MetricExpr": "INST_RETIRED.ANY / cycles",
-        "MetricGroup": "SMT",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT;TopDownL1",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "SMT",
+        "MetricGroup": "SMT;TopDownL1",
         "MetricName": "CoreIPC_SMT"
     },
     {
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / cycles",
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles",
         "MetricGroup": "FLOPS",
         "MetricName": "FLOPc"
     },
     {
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
         "MetricGroup": "FLOPS_SMT",
         "MetricName": "FLOPc_SMT"
     },
     {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
-        "MetricGroup": "Pipeline",
+        "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
         "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricGroup": "BrMispredicts",
         "MetricName": "Branch_Misprediction_Cost"
     },
     {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
-        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricGroup": "BrMispredicts_SMT",
         "MetricName": "Branch_Misprediction_Cost_SMT"
     },
     {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
         "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
         "MetricGroup": "BrMispredicts",
         "MetricName": "IpMispredict"
     },
     {
-        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
         "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+        "MetricGroup": "Instruction_Type",
+        "MetricName": "IpLoad"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+        "MetricGroup": "Instruction_Type",
+        "MetricName": "IpStore"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+        "MetricGroup": "Branches;Instruction_Type",
+        "MetricName": "IpBranch"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTkBranch"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
+        "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+        "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+        "MetricName": "IpFLOP"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricGroup": "Summary;TopDownL1",
+        "MetricName": "Instructions"
+    },
+    {
+        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Fetch_BW",
+        "MetricName": "DSB_Coverage"
+    },
+    {
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
-        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricConstraint": "NO_NMI_WATCHDOG",
         "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricGroup": "TLB",
-        "MetricName": "Page_Walks_Utilization",
-        "MetricConstraint": "NO_NMI_WATCHDOG"
+        "MetricName": "Page_Walks_Utilization"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
         "MetricGroup": "TLB_SMT",
         "MetricName": "Page_Walks_Utilization_SMT"
     },
     {
-        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
         "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
         "MetricGroup": "Memory_BW",
         "MetricName": "L1D_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
         "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
         "MetricGroup": "Memory_BW",
         "MetricName": "L2_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricGroup": "Memory_BW",
         "MetricName": "L3_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW;Offcore",
         "MetricName": "L3_Cache_Access_BW"
     },
     {
-        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L1MPKI"
     },
     {
-        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L2MPKI"
     },
     {
-        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;Offcore",
         "MetricName": "L2MPKI_All"
     },
     {
-        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L2HPKI_All"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L3MPKI"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
-        "MetricGroup": "Summary",
+        "BriefDescription": "Average CPU Utilization",
+        "MetricGroup": "HPC;Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
-        "MetricGroup": "FLOPS;Summary",
+        "MetricGroup": "FLOPS;HPC",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
-        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
-        "MetricGroup": "SMT;Summary",
+        "MetricGroup": "SMT",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
-        "MetricGroup": "Summary",
+        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+        "MetricGroup": "OS",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
-        "MetricGroup": "Memory_BW",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "HPC;Memory_BW;SoC",
         "MetricName": "DRAM_BW_Use"
     },
     {
+        "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,cmask\\=1@",
         "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
-        "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,thresh\\=1@",
-        "MetricGroup": "Memory_BW",
-        "MetricName": "DRAM_Parallel_Reads"
+        "MetricGroup": "Memory_BW;SoC",
+        "MetricName": "MEM_Parallel_Reads"
     },
     {
-        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )",
         "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
-        "MetricGroup": "",
+        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+        "MetricGroup": "Branches;OS",
         "MetricName": "IpFarBranch"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C3 residency percent per core",
         "MetricGroup": "Power",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C6 residency percent per core",
         "MetricGroup": "Power",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C7 residency percent per core",
         "MetricGroup": "Power",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C2 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C3 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C6 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C7 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C7_Pkg_Residency"
     }
index 2bcba7d..432530d 100644 (file)
 [
     {
-        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
-        "EventCode": "0x08",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x08",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x08",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x08",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
-        "EventCode": "0x08",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
-        "EventCode": "0x08",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAE",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
-        "EventCode": "0x08",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
-        "EventCode": "0x08",
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
         "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
-        "EventCode": "0x49",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
     },
     {
-        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
-        "EventCode": "0x49",
+        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
-        "EventCode": "0x49",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks (1G page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
-        "EventCode": "0x4F",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "EPT.WALK_PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
     },
     {
-        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
-        "EventCode": "0x85",
+        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Misses at all ITLB levels that cause page walks",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x4f",
+        "EventName": "EPT.WALK_PENDING",
+        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
-        "EventCode": "0x85",
+        "BriefDescription": "STLB flush attempts",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "ITLB_MISSES.WALK_PENDING",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
-        "EventCode": "0x85",
+        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "ITLB_MISSES.WALK_ACTIVE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0x85",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "ITLB_MISSES.STLB_HIT",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
-        "EventCode": "0xAE",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ITLB.ITLB_FLUSH",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
-        "EventCode": "0xBD",
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "TLB_FLUSH.DTLB_THREAD",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
     },
     {
-        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
-        "EventCode": "0xBD",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "TLB_FLUSH.STLB_ANY",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "STLB flush attempts",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     }
 ]
\ No newline at end of file
index e47644c..e1f3f5c 100644 (file)
@@ -55,6 +55,7 @@ char *prog;
 
 struct json_event {
        char *name;
+       char *compat;
        char *event;
        char *desc;
        char *long_desc;
@@ -82,6 +83,23 @@ enum aggr_mode_class convert(const char *aggr_mode)
 
 typedef int (*func)(void *data, struct json_event *je);
 
+static LIST_HEAD(sys_event_tables);
+
+struct sys_event_table {
+       struct list_head list;
+       char *soc_id;
+};
+
+static void free_sys_event_tables(void)
+{
+       struct sys_event_table *et, *next;
+
+       list_for_each_entry_safe(et, next, &sys_event_tables, list) {
+               free(et->soc_id);
+               free(et);
+       }
+}
+
 int eprintf(int level, int var, const char *fmt, ...)
 {
 
@@ -263,6 +281,8 @@ static struct map {
        { "hisi_sccl,ddrc", "hisi_sccl,ddrc" },
        { "hisi_sccl,hha", "hisi_sccl,hha" },
        { "hisi_sccl,l3c", "hisi_sccl,l3c" },
+       /* it's not realistic to keep adding these, we need something more scalable ... */
+       { "imx8_ddr", "imx8_ddr" },
        { "L3PMC", "amd_l3" },
        { "DFPMC", "amd_df" },
        {}
@@ -360,6 +380,8 @@ static int print_events_table_entry(void *data, struct json_event *je)
        if (je->event)
                fprintf(outfp, "\t.event = \"%s\",\n", je->event);
        fprintf(outfp, "\t.desc = \"%s\",\n", je->desc);
+       if (je->compat)
+               fprintf(outfp, "\t.compat = \"%s\",\n", je->compat);
        fprintf(outfp, "\t.topic = \"%s\",\n", topic);
        if (je->long_desc && je->long_desc[0])
                fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc);
@@ -390,6 +412,7 @@ struct event_struct {
        struct list_head list;
        char *name;
        char *event;
+       char *compat;
        char *desc;
        char *long_desc;
        char *pmu;
@@ -583,6 +606,8 @@ static int json_events(const char *fn,
                                free(code);
                        } else if (json_streq(map, field, "EventName")) {
                                addfield(map, &je.name, "", "", val);
+                       } else if (json_streq(map, field, "Compat")) {
+                               addfield(map, &je.compat, "", "", val);
                        } else if (json_streq(map, field, "BriefDescription")) {
                                addfield(map, &je.desc, "", "", val);
                                fixdesc(je.desc);
@@ -683,6 +708,7 @@ free_strings:
                free(event);
                free(je.desc);
                free(je.name);
+               free(je.compat);
                free(je.long_desc);
                free(extra_desc);
                free(je.pmu);
@@ -747,6 +773,15 @@ static char *file_name_to_table_name(char *fname)
        return tblname;
 }
 
+static bool is_sys_dir(char *fname)
+{
+       size_t len = strlen(fname), len2 = strlen("/sys");
+
+       if (len2 > len)
+               return false;
+       return !strcmp(fname+len-len2, "/sys");
+}
+
 static void print_mapping_table_prefix(FILE *outfp)
 {
        fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n");
@@ -781,6 +816,33 @@ static void print_mapping_test_table(FILE *outfp)
        fprintf(outfp, "},\n");
 }
 
+static void print_system_event_mapping_table_prefix(FILE *outfp)
+{
+       fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {");
+}
+
+static void print_system_event_mapping_table_suffix(FILE *outfp)
+{
+       fprintf(outfp, "\n\t{\n\t\t.table = 0\n\t},");
+       fprintf(outfp, "\n};\n");
+}
+
+static int process_system_event_tables(FILE *outfp)
+{
+       struct sys_event_table *sys_event_table;
+
+       print_system_event_mapping_table_prefix(outfp);
+
+       list_for_each_entry(sys_event_table, &sys_event_tables, list) {
+               fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t},",
+                       sys_event_table->soc_id);
+       }
+
+       print_system_event_mapping_table_suffix(outfp);
+
+       return 0;
+}
+
 static int process_mapfile(FILE *outfp, char *fpath)
 {
        int n = 16384;
@@ -886,6 +948,8 @@ static void create_empty_mapping(const char *output_file)
        fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n");
        print_mapping_table_prefix(outfp);
        print_mapping_table_suffix(outfp);
+       print_system_event_mapping_table_prefix(outfp);
+       print_system_event_mapping_table_suffix(outfp);
        fclose(outfp);
 }
 
@@ -978,15 +1042,20 @@ static int process_one_file(const char *fpath, const struct stat *sb,
        int level   = ftwbuf->level;
        int err = 0;
 
-       if (level == 2 && is_dir) {
+       if (level >= 2 && is_dir) {
+               int count = 0;
                /*
                 * For level 2 directory, bname will include parent name,
                 * like vendor/platform. So search back from platform dir
                 * to find this.
+                * Something similar for level 3 directory, but we're a PMU
+                * category folder, like vendor/platform/cpu.
                 */
                bname = (char *) fpath + ftwbuf->base - 2;
                for (;;) {
                        if (*bname == '/')
+                               count++;
+                       if (count == level - 1)
                                break;
                        bname--;
                }
@@ -999,13 +1068,13 @@ static int process_one_file(const char *fpath, const struct stat *sb,
                 level, sb->st_size, bname, fpath);
 
        /* base dir or too deep */
-       if (level == 0 || level > 3)
+       if (level == 0 || level > 4)
                return 0;
 
 
        /* model directory, reset topic */
        if ((level == 1 && is_dir && is_leaf_dir(fpath)) ||
-           (level == 2 && is_dir)) {
+           (level >= 2 && is_dir && is_leaf_dir(fpath))) {
                if (close_table)
                        print_events_table_suffix(eventsfp);
 
@@ -1021,6 +1090,22 @@ static int process_one_file(const char *fpath, const struct stat *sb,
                        return -1;
                }
 
+               if (is_sys_dir(bname)) {
+                       struct sys_event_table *sys_event_table;
+
+                       sys_event_table = malloc(sizeof(*sys_event_table));
+                       if (!sys_event_table)
+                               return -1;
+
+                       sys_event_table->soc_id = strdup(tblname);
+                       if (!sys_event_table->soc_id) {
+                               free(sys_event_table);
+                               return -1;
+                       }
+                       list_add_tail(&sys_event_table->list,
+                                     &sys_event_tables);
+               }
+
                print_events_table_prefix(eventsfp, tblname);
                return 0;
        }
@@ -1100,12 +1185,13 @@ static int process_one_file(const char *fpath, const struct stat *sb,
  */
 int main(int argc, char *argv[])
 {
-       int rc, ret = 0;
+       int rc, ret = 0, empty_map = 0;
        int maxfds;
        char ldirname[PATH_MAX];
        const char *arch;
        const char *output_file;
        const char *start_dirname;
+       char *err_string_ext = "";
        struct stat stbuf;
 
        prog = basename(argv[0]);
@@ -1133,7 +1219,8 @@ int main(int argc, char *argv[])
        /* If architecture does not have any event lists, bail out */
        if (stat(ldirname, &stbuf) < 0) {
                pr_info("%s: Arch %s has no PMU event lists\n", prog, arch);
-               goto empty_map;
+               empty_map = 1;
+               goto err_close_eventsfp;
        }
 
        /* Include pmu-events.h first */
@@ -1150,75 +1237,70 @@ int main(int argc, char *argv[])
         */
 
        maxfds = get_maxfds();
-       mapfile = NULL;
        rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
-       if (rc && verbose) {
-               pr_info("%s: Error preprocessing arch standard files %s\n",
-                       prog, ldirname);
-               goto empty_map;
-       } else if (rc < 0) {
-               /* Make build fail */
-               fclose(eventsfp);
-               free_arch_std_events();
-               return 1;
-       } else if (rc) {
-               goto empty_map;
-       }
+       if (rc)
+               goto err_processing_std_arch_event_dir;
 
        rc = nftw(ldirname, process_one_file, maxfds, 0);
-       if (rc && verbose) {
-               pr_info("%s: Error walking file tree %s\n", prog, ldirname);
-               goto empty_map;
-       } else if (rc < 0) {
-               /* Make build fail */
-               fclose(eventsfp);
-               free_arch_std_events();
-               ret = 1;
-               goto out_free_mapfile;
-       } else if (rc) {
-               goto empty_map;
-       }
+       if (rc)
+               goto err_processing_dir;
 
        sprintf(ldirname, "%s/test", start_dirname);
 
+       rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
+       if (rc)
+               goto err_processing_std_arch_event_dir;
+
        rc = nftw(ldirname, process_one_file, maxfds, 0);
-       if (rc && verbose) {
-               pr_info("%s: Error walking file tree %s rc=%d for test\n",
-                       prog, ldirname, rc);
-               goto empty_map;
-       } else if (rc < 0) {
-               /* Make build fail */
-               free_arch_std_events();
-               ret = 1;
-               goto out_free_mapfile;
-       } else if (rc) {
-               goto empty_map;
-       }
+       if (rc)
+               goto err_processing_dir;
 
        if (close_table)
                print_events_table_suffix(eventsfp);
 
        if (!mapfile) {
                pr_info("%s: No CPU->JSON mapping?\n", prog);
-               goto empty_map;
+               empty_map = 1;
+               goto err_close_eventsfp;
        }
 
-       if (process_mapfile(eventsfp, mapfile)) {
+       rc = process_mapfile(eventsfp, mapfile);
+       if (rc) {
                pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
                /* Make build fail */
-               fclose(eventsfp);
-               free_arch_std_events();
                ret = 1;
+               goto err_close_eventsfp;
        }
 
+       rc = process_system_event_tables(eventsfp);
+       fclose(eventsfp);
+       if (rc) {
+               ret = 1;
+               goto err_out;
+       }
 
-       goto out_free_mapfile;
+       free_arch_std_events();
+       free(mapfile);
+       return 0;
 
-empty_map:
+err_processing_std_arch_event_dir:
+       err_string_ext = " for std arch event";
+err_processing_dir:
+       if (verbose) {
+               pr_info("%s: Error walking file tree %s%s\n", prog, ldirname,
+                       err_string_ext);
+               empty_map = 1;
+       } else if (rc < 0) {
+               ret = 1;
+       } else {
+               empty_map = 1;
+       }
+err_close_eventsfp:
        fclose(eventsfp);
-       create_empty_mapping(output_file);
+       if (empty_map)
+               create_empty_mapping(output_file);
+err_out:
        free_arch_std_events();
-out_free_mapfile:
        free(mapfile);
        return ret;
 }
index 7da1a37..d1172f6 100644 (file)
@@ -12,6 +12,7 @@ enum aggr_mode_class {
  */
 struct pmu_event {
        const char *name;
+       const char *compat;
        const char *event;
        const char *desc;
        const char *topic;
@@ -43,10 +44,15 @@ struct pmu_events_map {
        struct pmu_event *table;
 };
 
+struct pmu_sys_events {
+       struct pmu_event *table;
+};
+
 /*
  * Global table mapping each known CPU for the architecture to its
  * table of PMU events.
  */
 extern struct pmu_events_map pmu_events_map[];
+extern struct pmu_sys_events pmu_sys_event_tables[];
 
 #endif
index eb76f65..461848c 100755 (executable)
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#! /usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 # -*- python -*-
 # -*- coding: utf-8 -*-
index ff87ccf..04f3db2 100755 (executable)
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#! /usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 # -*- python -*-
 # -*- coding: utf-8 -*-
index 4d15bf6..aa4dc4f 100644 (file)
@@ -62,6 +62,7 @@ perf-y += pfm.o
 perf-y += parse-metric.o
 perf-y += pe-file-parsing.o
 perf-y += expand-cgroup.o
+perf-y += perf-time-to-tsc.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
        $(call rule_mkdir)
index 15cea51..b4b9a94 100644 (file)
@@ -109,7 +109,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m
                return TEST_FAIL;
        }
 
-       err = perf_evlist__create_maps(evlist, &opts.target);
+       err = evlist__create_maps(evlist, &opts.target);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                goto out_delete_evlist;
@@ -127,7 +127,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m
                goto out_delete_evlist;
        }
 
-       perf_evlist__config(evlist, &opts, NULL);
+       evlist__config(evlist, &opts, NULL);
 
        err = evlist__open(evlist);
        if (err < 0) {
index cd77e33..f57e075 100644 (file)
@@ -9,12 +9,10 @@
 #include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
-#include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <api/fs/fs.h>
-#include <bpf/bpf.h>
 #include <perf/mmap.h>
 #include "tests.h"
 #include "llvm.h"
@@ -25,6 +23,8 @@
 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
 
 #ifdef HAVE_LIBBPF_SUPPORT
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
 
 static int epoll_pwait_loop(void)
 {
@@ -144,23 +144,23 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        pid[sizeof(pid) - 1] = '\0';
        opts.target.tid = opts.target.pid = pid;
 
-       /* Instead of perf_evlist__new_default, don't add default events */
+       /* Instead of evlist__new_default, don't add default events */
        evlist = evlist__new();
        if (!evlist) {
                pr_debug("Not enough memory to create evlist\n");
                return TEST_FAIL;
        }
 
-       err = perf_evlist__create_maps(evlist, &opts.target);
+       err = evlist__create_maps(evlist, &opts.target);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                goto out_delete_evlist;
        }
 
-       perf_evlist__splice_list_tail(evlist, &parse_state.list);
+       evlist__splice_list_tail(evlist, &parse_state.list);
        evlist->nr_groups = parse_state.nr_groups;
 
-       perf_evlist__config(evlist, &opts, NULL);
+       evlist__config(evlist, &opts, NULL);
 
        err = evlist__open(evlist);
        if (err < 0) {
index 132bdb3..7273823 100644 (file)
@@ -142,6 +142,7 @@ static struct test generic_tests[] = {
                        .skip_if_fail   = false,
                        .get_nr         = test__wp_subtest_get_nr,
                        .get_desc       = test__wp_subtest_get_desc,
+                       .skip_reason    = test__wp_subtest_skip_reason,
                },
        },
        {
@@ -350,6 +351,11 @@ static struct test generic_tests[] = {
                .func = test__expand_cgroup_events,
        },
        {
+               .desc = "Convert perf time to TSC",
+               .func = test__perf_time_to_tsc,
+               .is_supported = test__tsc_is_supported,
+       },
+       {
                .func = NULL,
        },
 };
index 035c912..7c098d4 100644 (file)
@@ -378,8 +378,8 @@ static int process_sample_event(struct machine *machine,
        struct thread *thread;
        int ret;
 
-       if (perf_evlist__parse_sample(evlist, event, &sample)) {
-               pr_debug("perf_evlist__parse_sample failed\n");
+       if (evlist__parse_sample(evlist, event, &sample)) {
+               pr_debug("evlist__parse_sample failed\n");
                return -1;
        }
 
@@ -637,7 +637,7 @@ static int do_test_code_reading(bool try_kcore)
 
                evlist = evlist__new();
                if (!evlist) {
-                       pr_debug("perf_evlist__new failed\n");
+                       pr_debug("evlist__new failed\n");
                        goto out_put;
                }
 
@@ -651,7 +651,7 @@ static int do_test_code_reading(bool try_kcore)
                        goto out_put;
                }
 
-               perf_evlist__config(evlist, &opts, NULL);
+               evlist__config(evlist, &opts, NULL);
 
                evsel = evlist__first(evlist);
 
index db68894..04ce440 100644 (file)
@@ -26,13 +26,13 @@ static int attach__enable_on_exec(struct evlist *evlist)
 
        pr_debug("attaching to spawned child, enable on exec\n");
 
-       err = perf_evlist__create_maps(evlist, &target);
+       err = evlist__create_maps(evlist, &target);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                return err;
        }
 
-       err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+       err = evlist__prepare_workload(evlist, &target, argv, false, NULL);
        if (err < 0) {
                pr_debug("Couldn't run the workload!\n");
                return err;
@@ -47,7 +47,7 @@ static int attach__enable_on_exec(struct evlist *evlist)
                return err;
        }
 
-       return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+       return evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
 }
 
 static int detach__enable_on_exec(struct evlist *evlist)
index bdcf032..6562181 100644 (file)
@@ -85,11 +85,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 
 int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
-       struct evlist *evlist;
        struct evsel *evsel;
        struct event_name tmp;
+       struct evlist *evlist = evlist__new_default();
 
-       evlist = perf_evlist__new_default();
        TEST_ASSERT_VAL("failed to get evlist", evlist);
 
        evsel = evlist__first(evlist);
index 0e224a0..f9e34bd 100644 (file)
@@ -5,8 +5,7 @@
 #include "tests.h"
 #include "debug.h"
 
-static int perf_evsel__test_field(struct evsel *evsel, const char *name,
-                                 int size, bool should_be_signed)
+static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed)
 {
        struct tep_format_field *field = evsel__field(evsel, name);
        int is_signed;
@@ -43,25 +42,25 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
                return -1;
        }
 
-       if (perf_evsel__test_field(evsel, "prev_comm", 16, false))
+       if (evsel__test_field(evsel, "prev_comm", 16, false))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
+       if (evsel__test_field(evsel, "prev_pid", 4, true))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
+       if (evsel__test_field(evsel, "prev_prio", 4, true))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true))
+       if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "next_comm", 16, false))
+       if (evsel__test_field(evsel, "next_comm", 16, false))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "next_pid", 4, true))
+       if (evsel__test_field(evsel, "next_pid", 4, true))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "next_prio", 4, true))
+       if (evsel__test_field(evsel, "next_prio", 4, true))
                ret = -1;
 
        evsel__delete(evsel);
@@ -73,16 +72,16 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
                return -1;
        }
 
-       if (perf_evsel__test_field(evsel, "comm", 16, false))
+       if (evsel__test_field(evsel, "comm", 16, false))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "pid", 4, true))
+       if (evsel__test_field(evsel, "pid", 4, true))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "prio", 4, true))
+       if (evsel__test_field(evsel, "prio", 4, true))
                ret = -1;
 
-       if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
+       if (evsel__test_field(evsel, "target_cpu", 4, true))
                ret = -1;
 
        evsel__delete(evsel);
index d5771e4..0e46aeb 100644 (file)
@@ -26,7 +26,7 @@ static int test_expand_events(struct evlist *evlist,
        char **ev_name;
        struct evsel *evsel;
 
-       TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist));
+       TEST_ASSERT_VAL("evlist is empty", !evlist__empty(evlist));
 
        nr_events = evlist->core.nr_entries;
        ev_name = calloc(nr_events, sizeof(*ev_name));
@@ -100,10 +100,9 @@ out:       for (i = 0; i < nr_events; i++)
 static int expand_default_events(void)
 {
        int ret;
-       struct evlist *evlist;
        struct rblist metric_events;
+       struct evlist *evlist = evlist__new_default();
 
-       evlist = perf_evlist__new_default();
        TEST_ASSERT_VAL("failed to get evlist", evlist);
 
        rblist__init(&metric_events);
@@ -145,7 +144,7 @@ static int expand_libpfm_events(void)
        int ret;
        struct evlist *evlist;
        struct rblist metric_events;
-       const char event_str[] = "UNHALTED_CORE_CYCLES";
+       const char event_str[] = "CYCLES";
        struct option opt = {
                .value = &evlist,
        };
@@ -161,7 +160,7 @@ static int expand_libpfm_events(void)
                         event_str, ret);
                goto out;
        }
-       if (perf_evlist__empty(evlist)) {
+       if (evlist__empty(evlist)) {
                pr_debug("libpfm was not enabled\n");
                goto out;
        }
index 50a0c9f..e6f1b2a 100644 (file)
@@ -92,7 +92,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
        CHECK__(parse_events(evlist, "dummy:u", NULL));
        CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-       perf_evlist__config(evlist, &opts, NULL);
+       evlist__config(evlist, &opts, NULL);
 
        evsel = evlist__first(evlist);
 
index ae6cda8..98da8a8 100644 (file)
@@ -2,13 +2,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <bpf/libbpf.h>
-#include <util/llvm-utils.h>
-#include "llvm.h"
 #include "tests.h"
 #include "debug.h"
 
 #ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+#include <util/llvm-utils.h>
+#include "llvm.h"
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
        struct bpf_object *obj;
@@ -19,14 +19,6 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
        bpf_object__close(obj);
        return TEST_OK;
 }
-#else
-static int test__bpf_parsing(void *obj_buf __maybe_unused,
-                            size_t obj_buf_sz __maybe_unused)
-{
-       pr_debug("Skip bpf parsing\n");
-       return TEST_OK;
-}
-#endif
 
 static struct {
        const char *source;
@@ -170,3 +162,19 @@ const char *test__llvm_subtest_get_desc(int subtest)
 
        return bpf_source_table[subtest].desc;
 }
+#else //HAVE_LIBBPF_SUPPORT
+int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+       return TEST_SKIP;
+}
+
+int test__llvm_subtest_get_nr(void)
+{
+       return 0;
+}
+
+const char *test__llvm_subtest_get_desc(int subtest __maybe_unused)
+{
+       return NULL;
+}
+#endif // HAVE_LIBBPF_SUPPORT
index 7b0dbfc..57093ae 100644 (file)
@@ -69,7 +69,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 
        evlist = evlist__new();
        if (evlist == NULL) {
-               pr_debug("perf_evlist__new\n");
+               pr_debug("evlist__new\n");
                goto out_free_cpus;
        }
 
@@ -126,14 +126,14 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
                        goto out_delete_evlist;
                }
 
-               err = perf_evlist__parse_sample(evlist, event, &sample);
+               err = evlist__parse_sample(evlist, event, &sample);
                if (err) {
                        pr_err("Can't parse sample, err = %d\n", err);
                        goto out_delete_evlist;
                }
 
                err = -1;
-               evsel = perf_evlist__id2evsel(evlist, sample.id);
+               evsel = evlist__id2evsel(evlist, sample.id);
                if (evsel == NULL) {
                        pr_debug("event with id %" PRIu64
                                 " doesn't map to an evsel\n", sample.id);
index 1f5f5e7..5e4af2f 100644 (file)
@@ -42,7 +42,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
        char sbuf[STRERR_BUFSIZE];
 
        if (evlist == NULL) {
-               pr_debug("%s: perf_evlist__new\n", __func__);
+               pr_debug("%s: evlist__new\n", __func__);
                goto out;
        }
 
@@ -54,9 +54,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 
        evlist__add(evlist, evsel);
 
-       err = perf_evlist__create_maps(evlist, &opts.target);
+       err = evlist__create_maps(evlist, &opts.target);
        if (err < 0) {
-               pr_debug("%s: perf_evlist__create_maps\n", __func__);
+               pr_debug("%s: evlist__create_maps\n", __func__);
                goto out_delete_evlist;
        }
 
index 611512f..a7f6661 100644 (file)
@@ -115,7 +115,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong config",
                        PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
        /*
-        * The period value gets configured within perf_evlist__config,
+        * The period value gets configured within evlist__config,
         * while this test executes only parse events method.
         */
        TEST_ASSERT_VAL("wrong period",
@@ -443,7 +443,7 @@ static int test__checkevent_pmu(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong config1",    1 == evsel->core.attr.config1);
        TEST_ASSERT_VAL("wrong config2",    3 == evsel->core.attr.config2);
        /*
-        * The period value gets configured within perf_evlist__config,
+        * The period value gets configured within evlist__config,
         * while this test executes only parse events method.
         */
        TEST_ASSERT_VAL("wrong period",     0 == evsel->core.attr.sample_period);
@@ -520,8 +520,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",  1 == evsel->core.attr.config);
        /*
-        * The period, time and callgraph value gets configured
-        * within perf_evlist__config,
+        * The period, time and callgraph value gets configured within evlist__config,
         * while this test executes only parse events method.
         */
        TEST_ASSERT_VAL("wrong period",     0 == evsel->core.attr.sample_period);
@@ -533,8 +532,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
        TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
        TEST_ASSERT_VAL("wrong config",  2 == evsel->core.attr.config);
        /*
-        * The period, time and callgraph value gets configured
-        * within perf_evlist__config,
+        * The period, time and callgraph value gets configured within evlist__config,
         * while this test executes only parse events method.
         */
        TEST_ASSERT_VAL("wrong period",     0 == evsel->core.attr.sample_period);
index 7c1bde0..ce7be37 100644 (file)
@@ -166,7 +166,7 @@ static int __compute_metric(const char *name, struct value *vals,
        if (err)
                goto out;
 
-       err = perf_evlist__alloc_stats(evlist, false);
+       err = evlist__alloc_stats(evlist, false);
        if (err)
                goto out;
 
@@ -183,7 +183,7 @@ out:
        /* ... clenup. */
        metricgroup__rblist_exit(&metric_events);
        runtime_stat__exit(&st);
-       perf_evlist__free_stats(evlist);
+       evlist__free_stats(evlist);
        perf_cpu_map__put(cpus);
        evlist__delete(evlist);
        return err;
index adf3c9c..4712736 100644 (file)
@@ -27,8 +27,8 @@ static int process_event(struct evlist **pevlist, union perf_event *event)
        if (!*pevlist)
                return -1;
 
-       if (perf_evlist__parse_sample(*pevlist, event, &sample)) {
-               pr_debug("perf_evlist__parse_sample failed\n");
+       if (evlist__parse_sample(*pevlist, event, &sample)) {
+               pr_debug("evlist__parse_sample failed\n");
                return -1;
        }
 
index 67d3f5a..0df471b 100644 (file)
@@ -53,7 +53,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        };
        cpu_set_t cpu_mask;
        size_t cpu_mask_size = sizeof(cpu_mask);
-       struct evlist *evlist = perf_evlist__new_dummy();
+       struct evlist *evlist = evlist__new_dummy();
        struct evsel *evsel;
        struct perf_sample sample;
        const char *cmd = "sleep";
@@ -71,7 +71,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        char sbuf[STRERR_BUFSIZE];
 
        if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
-               evlist = perf_evlist__new_default();
+               evlist = evlist__new_default();
 
        if (evlist == NULL) {
                pr_debug("Not enough memory to create evlist\n");
@@ -81,10 +81,10 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        /*
         * Create maps of threads and cpus to monitor. In this case
         * we start with all threads and cpus (-1, -1) but then in
-        * perf_evlist__prepare_workload we'll fill in the only thread
+        * evlist__prepare_workload we'll fill in the only thread
         * we're monitoring, the one forked there.
         */
-       err = perf_evlist__create_maps(evlist, &opts.target);
+       err = evlist__create_maps(evlist, &opts.target);
        if (err < 0) {
                pr_debug("Not enough memory to create thread/cpu maps\n");
                goto out_delete_evlist;
@@ -92,11 +92,11 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 
        /*
         * Prepare the workload in argv[] to run, it'll fork it, and then wait
-        * for perf_evlist__start_workload() to exec it. This is done this way
+        * for evlist__start_workload() to exec it. This is done this way
         * so that we have time to open the evlist (calling sys_perf_event_open
         * on all the fds) and then mmap them.
         */
-       err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
+       err = evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
        if (err < 0) {
                pr_debug("Couldn't run the workload!\n");
                goto out_delete_evlist;
@@ -109,7 +109,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        evsel__set_sample_bit(evsel, CPU);
        evsel__set_sample_bit(evsel, TID);
        evsel__set_sample_bit(evsel, TIME);
-       perf_evlist__config(evlist, &opts, NULL);
+       evlist__config(evlist, &opts, NULL);
 
        err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
        if (err < 0) {
@@ -161,7 +161,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
        /*
         * Now!
         */
-       perf_evlist__start_workload(evlist);
+       evlist__start_workload(evlist);
 
        while (1) {
                int before = total_events;
@@ -182,7 +182,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
                                if (type < PERF_RECORD_MAX)
                                        nr_events[type]++;
 
-                               err = perf_evlist__parse_sample(evlist, event, &sample);
+                               err = evlist__parse_sample(evlist, event, &sample);
                                if (err < 0) {
                                        if (verbose > 0)
                                                perf_event__fprintf(event, NULL, stderr);
similarity index 91%
rename from tools/perf/arch/x86/tests/perf-time-to-tsc.c
rename to tools/perf/tests/perf-time-to-tsc.c
index 026d32e..7cff026 100644 (file)
 #include "thread_map.h"
 #include "record.h"
 #include "tsc.h"
-#include "util/mmap.h"
-#include "tests/tests.h"
-
-#include "arch-tests.h"
+#include "mmap.h"
+#include "tests.h"
 
 #define CHECK__(x) {                           \
        while ((x) < 0) {                       \
@@ -82,7 +80,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
        CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-       perf_evlist__config(evlist, &opts, NULL);
+       evlist__config(evlist, &opts, NULL);
 
        evsel = evlist__first(evlist);
 
@@ -171,3 +169,16 @@ out_err:
        evlist__delete(evlist);
        return err;
 }
+
+bool test__tsc_is_supported(void)
+{
+       /*
+        * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.
+        * Just enable the test for x86_64/i386 and Arm64 archs.
+        */
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
+       return true;
+#else
+       return false;
+#endif
+}
index d3517a7..0ca6a5a 100644 (file)
 #include "util/parse-events.h"
 
 struct perf_pmu_test_event {
+       /* used for matching against events from generated pmu-events.c */
        struct pmu_event event;
 
+       /* used for matching against event aliases */
        /* extra events for aliases */
        const char *alias_str;
 
@@ -78,6 +80,17 @@ static struct perf_pmu_test_event test_cpu_events[] = {
                .alias_str = "umask=0,(null)=0x30d40,event=0x3a",
                .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
        },
+       {
+               .event = {
+                       .name = "l3_cache_rd",
+                       .event = "event=0x40",
+                       .desc = "L3 cache access, read",
+                       .long_desc = "Attributable Level 3 cache access, read",
+                       .topic = "cache",
+               },
+               .alias_str = "event=0x40",
+               .alias_long_desc = "Attributable Level 3 cache access, read",
+       },
        { /* sentinel */
                .event = {
                        .name = NULL,
@@ -357,6 +370,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
 }
 
 
+/* Test that aliases generated are as expected */
 static int test_aliases(void)
 {
        struct perf_pmu *pmu = NULL;
@@ -561,7 +575,7 @@ static int metric_parse_fake(const char *str)
                }
        }
 
-       if (expr__parse(&result, &ctx, str, 1))
+       if (expr__parse(&result, &ctx, str, 0))
                pr_err("expr__parse failed\n");
        else
                ret = 0;
index a0bdaf3..2393916 100644 (file)
@@ -154,6 +154,9 @@ static bool samples_same(const struct perf_sample *s1,
        if (type & PERF_SAMPLE_CGROUP)
                COMP(cgroup);
 
+       if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               COMP(data_page_size);
+
        if (type & PERF_SAMPLE_AUX) {
                COMP(aux_sample.size);
                if (memcmp(s1->aux_sample.data, s2->aux_sample.data,
@@ -234,6 +237,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
                },
                .phys_addr      = 113,
                .cgroup         = 114,
+               .data_page_size = 115,
                .aux_sample     = {
                        .size   = sizeof(aux_data),
                        .data   = (void *)aux_data,
@@ -340,7 +344,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
         * were added.  Please actually update the test rather than just change
         * the condition below.
         */
-       if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) {
+       if (PERF_SAMPLE_MAX > PERF_SAMPLE_CODE_PAGE_SIZE << 1) {
                pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
                return -1;
        }
diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh
new file mode 100755 (executable)
index 0000000..249dfe4
--- /dev/null
@@ -0,0 +1,80 @@
+#!/bin/sh
+# perf stat metrics (shadow stat) test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# skip if system-wide mode is forbidden
+perf stat -a true > /dev/null 2>&1 || exit 2
+
+test_global_aggr()
+{
+       local cyc
+
+       perf stat -a --no-big-num -e cycles,instructions sleep 1  2>&1 | \
+       grep -e cycles -e instructions | \
+       while read num evt hash ipc rest
+       do
+               # skip not counted events
+               if [[ $num == "<not" ]]; then
+                       continue
+               fi
+
+               # save cycles count
+               if [[ $evt == "cycles" ]]; then
+                       cyc=$num
+                       continue
+               fi
+
+               # skip if no cycles
+               if [[ -z $cyc ]]; then
+                       continue
+               fi
+
+               # use printf for rounding and a leading zero
+               local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+               if [[ $ipc != $res ]]; then
+                       echo "IPC is different: $res != $ipc  ($num / $cyc)"
+                       exit 1
+               fi
+       done
+}
+
+test_no_aggr()
+{
+       declare -A results
+
+       perf stat -a -A --no-big-num -e cycles,instructions sleep 1  2>&1 | \
+       grep ^CPU | \
+       while read cpu num evt hash ipc rest
+       do
+               # skip not counted events
+               if [[ $num == "<not" ]]; then
+                       continue
+               fi
+
+               # save cycles count
+               if [[ $evt == "cycles" ]]; then
+                       results[$cpu]=$num
+                       continue
+               fi
+
+               # skip if no cycles
+               local cyc=${results[$cpu]}
+               if [[ -z $cyc ]]; then
+                       continue
+               fi
+
+               # use printf for rounding and a leading zero
+               local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+               if [[ $ipc != $res ]]; then
+                       echo "IPC is different for $cpu: $res != $ipc  ($num / $cyc)"
+                       exit 1
+               fi
+       done
+}
+
+test_global_aggr
+test_no_aggr
+
+exit 0
index 11cc2af..3d31c1d 100755 (executable)
@@ -20,7 +20,7 @@ skip_if_no_perf_trace || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-       evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+       evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
        perf trace -e $evts touch $file 2>&1 | \
        egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
index 4b9b731..a49c9e2 100644 (file)
@@ -109,7 +109,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
                if (event->header.type != PERF_RECORD_SAMPLE)
                        goto next_event;
 
-               err = perf_evlist__parse_sample(evlist, event, &sample);
+               err = evlist__parse_sample(evlist, event, &sample);
                if (err < 0) {
                        pr_debug("Error during parse sample\n");
                        goto out_delete_evlist;
index db5e1f7..15a2ab7 100644 (file)
@@ -128,12 +128,12 @@ static int process_sample_event(struct evlist *evlist,
        pid_t next_tid, prev_tid;
        int cpu, err;
 
-       if (perf_evlist__parse_sample(evlist, event, &sample)) {
-               pr_debug("perf_evlist__parse_sample failed\n");
+       if (evlist__parse_sample(evlist, event, &sample)) {
+               pr_debug("evlist__parse_sample failed\n");
                return -1;
        }
 
-       evsel = perf_evlist__id2evsel(evlist, sample.id);
+       evsel = evlist__id2evsel(evlist, sample.id);
        if (evsel == switch_tracking->switch_evsel) {
                next_tid = evsel__intval(evsel, &sample, "next_pid");
                prev_tid = evsel__intval(evsel, &sample, "prev_pid");
@@ -223,8 +223,8 @@ static int add_event(struct evlist *evlist, struct list_head *events,
        node->event = event;
        list_add(&node->list, events);
 
-       if (perf_evlist__parse_sample(evlist, event, &sample)) {
-               pr_debug("perf_evlist__parse_sample failed\n");
+       if (evlist__parse_sample(evlist, event, &sample)) {
+               pr_debug("evlist__parse_sample failed\n");
                return -1;
        }
 
@@ -380,7 +380,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
        cycles_evsel = evlist__last(evlist);
 
        /* Third event */
-       if (!perf_evlist__can_select_event(evlist, sched_switch)) {
+       if (!evlist__can_select_event(evlist, sched_switch)) {
                pr_debug("No sched_switch\n");
                err = 0;
                goto out;
@@ -406,7 +406,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
                pr_debug("cycles event already at front");
                goto out_err;
        }
-       perf_evlist__to_front(evlist, cycles_evsel);
+       evlist__to_front(evlist, cycles_evsel);
        if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Failed to move cycles event to front");
                goto out_err;
@@ -424,7 +424,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 
        tracking_evsel = evlist__last(evlist);
 
-       perf_evlist__set_tracking_event(evlist, tracking_evsel);
+       evlist__set_tracking_event(evlist, tracking_evsel);
 
        tracking_evsel->core.attr.freq = 0;
        tracking_evsel->core.attr.sample_period = 1;
@@ -432,7 +432,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
        evsel__set_sample_bit(tracking_evsel, TIME);
 
        /* Config events */
-       perf_evlist__config(evlist, &opts, NULL);
+       evlist__config(evlist, &opts, NULL);
 
        /* Check moved event is still at the front */
        if (cycles_evsel != evlist__first(evlist)) {
index adaff90..bbf94e4 100644 (file)
@@ -23,7 +23,7 @@ static void sig_handler(int sig __maybe_unused)
 }
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
  * we asked by setting its exec_error to this handler.
  */
 static void workload_exec_failed_signal(int signo __maybe_unused,
@@ -58,16 +58,16 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 
        signal(SIGCHLD, sig_handler);
 
-       evlist = perf_evlist__new_default();
+       evlist = evlist__new_default();
        if (evlist == NULL) {
-               pr_debug("perf_evlist__new_default\n");
+               pr_debug("evlist__new_default\n");
                return -1;
        }
 
        /*
         * Create maps of threads and cpus to monitor. In this case
         * we start with all threads and cpus (-1, -1) but then in
-        * perf_evlist__prepare_workload we'll fill in the only thread
+        * evlist__prepare_workload we'll fill in the only thread
         * we're monitoring, the one forked there.
         */
        cpus = perf_cpu_map__dummy_new();
@@ -83,8 +83,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
        cpus    = NULL;
        threads = NULL;
 
-       err = perf_evlist__prepare_workload(evlist, &target, argv, false,
-                                           workload_exec_failed_signal);
+       err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
        if (err < 0) {
                pr_debug("Couldn't run the workload!\n");
                goto out_delete_evlist;
@@ -116,7 +115,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
                goto out_delete_evlist;
        }
 
-       perf_evlist__start_workload(evlist);
+       evlist__start_workload(evlist);
 
 retry:
        md = &evlist->mmap[0];
index c85a2c0..8e24a61 100644 (file)
@@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest);
 int test__bp_accounting(struct test *test, int subtest);
 int test__wp(struct test *test, int subtest);
 const char *test__wp_subtest_get_desc(int subtest);
+const char *test__wp_subtest_skip_reason(int subtest);
 int test__wp_subtest_get_nr(void);
 int test__task_exit(struct test *test, int subtest);
 int test__mem(struct test *test, int subtest);
@@ -124,10 +125,12 @@ int test__pfm_subtest_get_nr(void);
 int test__parse_metric(struct test *test, int subtest);
 int test__pe_file_parsing(struct test *test, int subtest);
 int test__expand_cgroup_events(struct test *test, int subtest);
+int test__perf_time_to_tsc(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
 bool test__wp_is_supported(void);
+bool test__tsc_is_supported(void);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
index 22daf2b..165feed 100644 (file)
@@ -40,7 +40,7 @@ static int session_write_header(char *path)
        session = perf_session__new(&data, false, NULL);
        TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
-       session->evlist = perf_evlist__new_default();
+       session->evlist = evlist__new_default();
        TEST_ASSERT_VAL("can't get evlist", session->evlist);
 
        perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
index d262d66..9387fa7 100644 (file)
@@ -174,10 +174,12 @@ static bool wp_ro_supported(void)
 #endif
 }
 
-static void wp_ro_skip_msg(void)
+static const char *wp_ro_skip_msg(void)
 {
 #if defined (__x86_64__) || defined (__i386__)
-       pr_debug("Hardware does not support read only watchpoints.\n");
+       return "missing hardware support";
+#else
+       return NULL;
 #endif
 }
 
@@ -185,7 +187,7 @@ static struct {
        const char *desc;
        int (*target_func)(void);
        bool (*is_supported)(void);
-       void (*skip_msg)(void);
+       const char *(*skip_msg)(void);
 } wp_testcase_table[] = {
        {
                .desc = "Read Only Watchpoint",
@@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i)
        return wp_testcase_table[i].desc;
 }
 
+const char *test__wp_subtest_skip_reason(int i)
+{
+       if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+               return NULL;
+       if (!wp_testcase_table[i].skip_msg)
+               return NULL;
+       return wp_testcase_table[i].skip_msg();
+}
+
 int test__wp(struct test *test __maybe_unused, int i)
 {
        if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
                return TEST_FAIL;
 
        if (wp_testcase_table[i].is_supported &&
-           !wp_testcase_table[i].is_supported()) {
-               wp_testcase_table[i].skip_msg();
+           !wp_testcase_table[i].is_supported())
                return TEST_SKIP;
-       }
 
        return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
 }
index e9cb30d..385894b 100644 (file)
@@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
                             int __user *usockaddr_len);
 extern int __sys_socketpair(int family, int type, int protocol,
                            int __user *usockvec);
+extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
 
 extern struct ns_common *get_net_ns(struct ns_common *ns);
index 39eb259..7682571 100755 (executable)
@@ -28,12 +28,12 @@ egrep -q $regex ${linux_mman} && \
        egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
 (egrep $regex ${header_dir}/mman-common.h | \
        egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
 (egrep $regex ${header_dir}/mman.h | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
index 28f638f..664d8d5 100755 (executable)
@@ -17,7 +17,7 @@ prefix="PROT"
 
 printf "static const char *mmap_prot[] = {\n"
 regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}`
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
 (egrep $regex ${common_mman} | \
        egrep -vw PROT_NONE | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
index b0e1880..3b9818e 100644 (file)
@@ -2946,14 +2946,10 @@ next:
        }
 }
 
-static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
-                                   const char *helpline,
-                                   bool left_exits,
-                                   struct hist_browser_timer *hbt,
-                                   float min_pcnt,
-                                   struct perf_env *env,
-                                   bool warn_lost_event,
-                                   struct annotation_options *annotation_opts)
+static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline,
+                              bool left_exits, struct hist_browser_timer *hbt, float min_pcnt,
+                              struct perf_env *env, bool warn_lost_event,
+                              struct annotation_options *annotation_opts)
 {
        struct hists *hists = evsel__hists(evsel);
        struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts);
@@ -3268,7 +3264,7 @@ do_hotkey:                 // key came straight from options ui__popup_menu()
                        if (!is_report_browser(hbt)) {
                                struct perf_top *top = hbt->arg;
 
-                               perf_evlist__toggle_enable(top->evlist);
+                               evlist__toggle_enable(top->evlist);
                                /*
                                 * No need to refresh, resort/decay histogram
                                 * entries if we are not collecting samples:
@@ -3498,19 +3494,17 @@ static int perf_evsel_menu__run(struct evsel_menu *menu,
                                continue;
                        pos = menu->selection;
 browse_hists:
-                       perf_evlist__set_selected(evlist, pos);
+                       evlist__set_selected(evlist, pos);
                        /*
                         * Give the calling tool a chance to populate the non
                         * default evsel resorted hists tree.
                         */
                        if (hbt)
                                hbt->timer(hbt->arg);
-                       key = perf_evsel__hists_browse(pos, nr_events, help,
-                                                      true, hbt,
-                                                      menu->min_pcnt,
-                                                      menu->env,
-                                                      warn_lost_event,
-                                                      menu->annotation_opts);
+                       key = evsel__hists_browse(pos, nr_events, help, true, hbt,
+                                                 menu->min_pcnt, menu->env,
+                                                 warn_lost_event,
+                                                 menu->annotation_opts);
                        ui_browser__show_title(&menu->b, title);
                        switch (key) {
                        case K_TAB:
@@ -3565,13 +3559,9 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
        return false;
 }
 
-static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
-                                          int nr_entries, const char *help,
-                                          struct hist_browser_timer *hbt,
-                                          float min_pcnt,
-                                          struct perf_env *env,
-                                          bool warn_lost_event,
-                                          struct annotation_options *annotation_opts)
+static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help,
+                                     struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env,
+                                     bool warn_lost_event, struct annotation_options *annotation_opts)
 {
        struct evsel *pos;
        struct evsel_menu menu = {
@@ -3603,7 +3593,7 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
                                    hbt, warn_lost_event);
 }
 
-static bool perf_evlist__single_entry(struct evlist *evlist)
+static bool evlist__single_entry(struct evlist *evlist)
 {
        int nr_entries = evlist->core.nr_entries;
 
@@ -3620,23 +3610,18 @@ static bool perf_evlist__single_entry(struct evlist *evlist)
        return false;
 }
 
-int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
-                                 struct hist_browser_timer *hbt,
-                                 float min_pcnt,
-                                 struct perf_env *env,
-                                 bool warn_lost_event,
-                                 struct annotation_options *annotation_opts)
+int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
+                            float min_pcnt, struct perf_env *env, bool warn_lost_event,
+                            struct annotation_options *annotation_opts)
 {
        int nr_entries = evlist->core.nr_entries;
 
-       if (perf_evlist__single_entry(evlist)) {
+       if (evlist__single_entry(evlist)) {
 single_entry: {
                struct evsel *first = evlist__first(evlist);
 
-               return perf_evsel__hists_browse(first, nr_entries, help,
-                                               false, hbt, min_pcnt,
-                                               env, warn_lost_event,
-                                               annotation_opts);
+               return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt,
+                                          env, warn_lost_event, annotation_opts);
        }
        }
 
@@ -3653,10 +3638,8 @@ single_entry: {
                        goto single_entry;
        }
 
-       return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-                                              hbt, min_pcnt, env,
-                                              warn_lost_event,
-                                              annotation_opts);
+       return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env,
+                                         warn_lost_event, annotation_opts);
 }
 
 static int block_hists_browser__title(struct hist_browser *browser, char *bf,
index a956393..a2b497f 100644 (file)
@@ -57,9 +57,8 @@ struct evlist;
 struct hist_entry;
 struct hist_browser_timer;
 
-int perf_evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
-                                 struct hist_browser_timer *hbt,
-                                 float min_pcnt);
+int evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
+                            struct hist_browser_timer *hbt, float min_pcnt);
 int hist_entry__gtk_annotate(struct hist_entry *he,
                             struct evsel *evsel,
                             struct hist_browser_timer *hbt);
index 53ef71a..c83be2d 100644 (file)
@@ -590,10 +590,8 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
        gtk_container_add(GTK_CONTAINER(window), view);
 }
 
-int perf_evlist__gtk_browse_hists(struct evlist *evlist,
-                                 const char *help,
-                                 struct hist_browser_timer *hbt __maybe_unused,
-                                 float min_pcnt)
+int evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
+                            struct hist_browser_timer *hbt __maybe_unused, float min_pcnt)
 {
        struct evsel *pos;
        GtkWidget *vbox;
index 6c8575e..ce8c07b 100644 (file)
 #include <inttypes.h>
 #include <libgen.h>
 #include <stdlib.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <linux/btf.h>
 #include "util.h" // hex_width()
 #include "ui/ui.h"
 #include "sort.h"
@@ -152,6 +148,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 #include "arch/arm/annotate/instructions.c"
 #include "arch/arm64/annotate/instructions.c"
 #include "arch/csky/annotate/instructions.c"
+#include "arch/mips/annotate/instructions.c"
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 #include "arch/s390/annotate/instructions.c"
@@ -175,6 +172,13 @@ static struct arch architectures[] = {
                .init = csky__annotate_init,
        },
        {
+               .name = "mips",
+               .init = mips__annotate_init,
+               .objdump = {
+                       .comment_char = '#',
+               },
+       },
+       {
                .name = "x86",
                .init = x86__annotate_init,
                .instructions = x86__instructions,
@@ -1676,6 +1680,10 @@ fallback:
 #define PACKAGE "perf"
 #include <bfd.h>
 #include <dis-asm.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
 
 static int symbol__disassemble_bpf(struct symbol *sym,
                                   struct annotate_args *args)
index 93e063f..90d575c 100644 (file)
@@ -12,6 +12,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/zalloc.h>
 
 
 #include "arm-spe-decoder.h"
 
-#ifndef BIT
-#define BIT(n)         (1UL << (n))
-#endif
-
 static u64 arm_spe_calc_ip(int index, u64 payload)
 {
-       u8 *addr = (u8 *)&payload;
-       int ns, el;
+       u64 ns, el, val;
 
        /* Instruction virtual address or Branch target address */
        if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
            index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
-               ns = addr[7] & SPE_ADDR_PKT_NS;
-               el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
+               ns = SPE_ADDR_PKT_GET_NS(payload);
+               el = SPE_ADDR_PKT_GET_EL(payload);
+
+               /* Clean highest byte */
+               payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 
                /* Fill highest byte for EL1 or EL2 (VHE) mode */
                if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
-                       addr[7] = 0xff;
-               /* Clean highest byte for other cases */
-               else
-                       addr[7] = 0x0;
+                       payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
 
        /* Data access virtual address */
        } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
 
-               /* Fill highest byte if bits [48..55] is 0xff */
-               if (addr[6] == 0xff)
-                       addr[7] = 0xff;
-               /* Otherwise, cleanup tags */
-               else
-                       addr[7] = 0x0;
+               /* Clean tags */
+               payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+
+               /*
+                * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet"
+                * defines the data virtual address payload format, the top byte
+                * (bits [63:56]) is assigned as top-byte tag; so we only can
+                * retrieve address value from bits [55:0].
+                *
+                * According to Documentation/arm64/memory.rst, if detects the
+                * specific pattern in bits [55:52] of payload which falls in
+                * the kernel space, should fixup the top byte and this allows
+                * perf tool to parse DSO symbol for data address correctly.
+                *
+                * For this reason, if detects the bits [55:52] is 0xf, will
+                * fill 0xff into the top byte.
+                */
+               val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload);
+               if ((val & 0xf0ULL) == 0xf0ULL)
+                       payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
 
        /* Data access physical address */
        } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
-               /* Cleanup byte 7 */
-               addr[7] = 0x0;
+               /* Clean highest byte */
+               payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
        } else {
                pr_err("unsupported address packet index: 0x%x\n", index);
        }
@@ -182,16 +192,13 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
                        if (payload & BIT(EV_TLB_ACCESS))
                                decoder->record.type |= ARM_SPE_TLB_ACCESS;
 
-                       if ((idx == 2 || idx == 4 || idx == 8) &&
-                           (payload & BIT(EV_LLC_MISS)))
+                       if (payload & BIT(EV_LLC_MISS))
                                decoder->record.type |= ARM_SPE_LLC_MISS;
 
-                       if ((idx == 2 || idx == 4 || idx == 8) &&
-                           (payload & BIT(EV_LLC_ACCESS)))
+                       if (payload & BIT(EV_LLC_ACCESS))
                                decoder->record.type |= ARM_SPE_LLC_ACCESS;
 
-                       if ((idx == 2 || idx == 4 || idx == 8) &&
-                           (payload & BIT(EV_REMOTE_ACCESS)))
+                       if (payload & BIT(EV_REMOTE_ACCESS))
                                decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
 
                        if (payload & BIT(EV_MISPRED))
index a5111a8..24727b8 100644 (file)
 
 #include "arm-spe-pkt-decoder.h"
 
-enum arm_spe_events {
-       EV_EXCEPTION_GEN        = 0,
-       EV_RETIRED              = 1,
-       EV_L1D_ACCESS           = 2,
-       EV_L1D_REFILL           = 3,
-       EV_TLB_ACCESS           = 4,
-       EV_TLB_WALK             = 5,
-       EV_NOT_TAKEN            = 6,
-       EV_MISPRED              = 7,
-       EV_LLC_ACCESS           = 8,
-       EV_LLC_MISS             = 9,
-       EV_REMOTE_ACCESS        = 10,
-       EV_ALIGNMENT            = 11,
-       EV_PARTIAL_PREDICATE    = 17,
-       EV_EMPTY_PREDICATE      = 18,
-};
-
 enum arm_spe_sample_type {
        ARM_SPE_L1D_ACCESS      = 1 << 0,
        ARM_SPE_L1D_MISS        = 1 << 1,
index b94001b..f3ac9d4 100644 (file)
@@ -8,36 +8,11 @@
 #include <string.h>
 #include <endian.h>
 #include <byteswap.h>
+#include <linux/bitops.h>
+#include <stdarg.h>
 
 #include "arm-spe-pkt-decoder.h"
 
-#define BIT(n)         (1ULL << (n))
-
-#define NS_FLAG                BIT(63)
-#define EL_FLAG                (BIT(62) | BIT(61))
-
-#define SPE_HEADER0_PAD                        0x0
-#define SPE_HEADER0_END                        0x1
-#define SPE_HEADER0_ADDRESS            0x30 /* address packet (short) */
-#define SPE_HEADER0_ADDRESS_MASK       0x38
-#define SPE_HEADER0_COUNTER            0x18 /* counter packet (short) */
-#define SPE_HEADER0_COUNTER_MASK       0x38
-#define SPE_HEADER0_TIMESTAMP          0x71
-#define SPE_HEADER0_TIMESTAMP          0x71
-#define SPE_HEADER0_EVENTS             0x2
-#define SPE_HEADER0_EVENTS_MASK                0xf
-#define SPE_HEADER0_SOURCE             0x3
-#define SPE_HEADER0_SOURCE_MASK                0xf
-#define SPE_HEADER0_CONTEXT            0x24
-#define SPE_HEADER0_CONTEXT_MASK       0x3c
-#define SPE_HEADER0_OP_TYPE            0x8
-#define SPE_HEADER0_OP_TYPE_MASK       0x3c
-#define SPE_HEADER1_ALIGNMENT          0x0
-#define SPE_HEADER1_ADDRESS            0xb0 /* address packet (extended) */
-#define SPE_HEADER1_ADDRESS_MASK       0xf8
-#define SPE_HEADER1_COUNTER            0x98 /* counter packet (extended) */
-#define SPE_HEADER1_COUNTER_MASK       0xf8
-
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define le16_to_cpu bswap_16
 #define le32_to_cpu bswap_32
@@ -70,27 +45,28 @@ const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
        return arm_spe_packet_name[type];
 }
 
-/* return ARM SPE payload size from its encoding,
- * which is in bits 5:4 of the byte.
- * 00 : byte
- * 01 : halfword (2)
- * 10 : word (4)
- * 11 : doubleword (8)
+/*
+ * Extracts the field "sz" from header bits and converts to bytes:
+ *   00 : byte (1)
+ *   01 : halfword (2)
+ *   10 : word (4)
+ *   11 : doubleword (8)
  */
-static int payloadlen(unsigned char byte)
+static unsigned int arm_spe_payload_len(unsigned char hdr)
 {
-       return 1 << ((byte & 0x30) >> 4);
+       return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4);
 }
 
 static int arm_spe_get_payload(const unsigned char *buf, size_t len,
+                              unsigned char ext_hdr,
                               struct arm_spe_pkt *packet)
 {
-       size_t payload_len = payloadlen(buf[0]);
+       size_t payload_len = arm_spe_payload_len(buf[ext_hdr]);
 
-       if (len < 1 + payload_len)
+       if (len < 1 + ext_hdr + payload_len)
                return ARM_SPE_NEED_MORE_BYTES;
 
-       buf++;
+       buf += 1 + ext_hdr;
 
        switch (payload_len) {
        case 1: packet->payload = *(uint8_t *)buf; break;
@@ -100,7 +76,7 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len,
        default: return ARM_SPE_BAD_PACKET;
        }
 
-       return 1 + payload_len;
+       return 1 + ext_hdr + payload_len;
 }
 
 static int arm_spe_get_pad(struct arm_spe_pkt *packet)
@@ -131,127 +107,128 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
                                 struct arm_spe_pkt *packet)
 {
        packet->type = ARM_SPE_TIMESTAMP;
-       return arm_spe_get_payload(buf, len, packet);
+       return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_events(const unsigned char *buf, size_t len,
                              struct arm_spe_pkt *packet)
 {
-       int ret = arm_spe_get_payload(buf, len, packet);
-
        packet->type = ARM_SPE_EVENTS;
 
        /* we use index to identify Events with a less number of
         * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
-        * LLC-REFILL, and REMOTE-ACCESS events are identified iff
+        * LLC-REFILL, and REMOTE-ACCESS events are identified if
         * index > 1.
         */
-       packet->index = ret - 1;
+       packet->index = arm_spe_payload_len(buf[0]);
 
-       return ret;
+       return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
                                   struct arm_spe_pkt *packet)
 {
        packet->type = ARM_SPE_DATA_SOURCE;
-       return arm_spe_get_payload(buf, len, packet);
+       return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_context(const unsigned char *buf, size_t len,
                               struct arm_spe_pkt *packet)
 {
        packet->type = ARM_SPE_CONTEXT;
-       packet->index = buf[0] & 0x3;
-
-       return arm_spe_get_payload(buf, len, packet);
+       packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]);
+       return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
                               struct arm_spe_pkt *packet)
 {
        packet->type = ARM_SPE_OP_TYPE;
-       packet->index = buf[0] & 0x3;
-       return arm_spe_get_payload(buf, len, packet);
+       packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]);
+       return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_counter(const unsigned char *buf, size_t len,
                               const unsigned char ext_hdr, struct arm_spe_pkt *packet)
 {
-       if (len < 2)
-               return ARM_SPE_NEED_MORE_BYTES;
-
        packet->type = ARM_SPE_COUNTER;
+
        if (ext_hdr)
-               packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+               packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
        else
-               packet->index = buf[0] & 0x7;
-
-       packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+               packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
 
-       return 1 + ext_hdr + 2;
+       return arm_spe_get_payload(buf, len, ext_hdr, packet);
 }
 
 static int arm_spe_get_addr(const unsigned char *buf, size_t len,
                            const unsigned char ext_hdr, struct arm_spe_pkt *packet)
 {
-       if (len < 8)
-               return ARM_SPE_NEED_MORE_BYTES;
-
        packet->type = ARM_SPE_ADDRESS;
+
        if (ext_hdr)
-               packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+               packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
        else
-               packet->index = buf[0] & 0x7;
-
-       memcpy_le64(&packet->payload, buf + 1, 8);
+               packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
 
-       return 1 + ext_hdr + 8;
+       return arm_spe_get_payload(buf, len, ext_hdr, packet);
 }
 
 static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
                                 struct arm_spe_pkt *packet)
 {
-       unsigned int byte;
+       unsigned int hdr;
+       unsigned char ext_hdr = 0;
 
        memset(packet, 0, sizeof(struct arm_spe_pkt));
 
        if (!len)
                return ARM_SPE_NEED_MORE_BYTES;
 
-       byte = buf[0];
-       if (byte == SPE_HEADER0_PAD)
+       hdr = buf[0];
+
+       if (hdr == SPE_HEADER0_PAD)
                return arm_spe_get_pad(packet);
-       else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */
+
+       if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */
                return arm_spe_get_end(packet);
-       else if (byte & 0xc0 /* 0y11xxxxxx */) {
-               if (byte & 0x80) {
-                       if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS)
-                               return arm_spe_get_addr(buf, len, 0, packet);
-                       if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER)
-                               return arm_spe_get_counter(buf, len, 0, packet);
-               } else
-                       if (byte == SPE_HEADER0_TIMESTAMP)
-                               return arm_spe_get_timestamp(buf, len, packet);
-                       else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS)
-                               return arm_spe_get_events(buf, len, packet);
-                       else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE)
-                               return arm_spe_get_data_source(buf, len, packet);
-                       else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT)
-                               return arm_spe_get_context(buf, len, packet);
-                       else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE)
-                               return arm_spe_get_op_type(buf, len, packet);
-       } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) {
-               /* 16-bit header */
-               byte = buf[1];
-               if (byte == SPE_HEADER1_ALIGNMENT)
+
+       if (hdr == SPE_HEADER0_TIMESTAMP)
+               return arm_spe_get_timestamp(buf, len, packet);
+
+       if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS)
+               return arm_spe_get_events(buf, len, packet);
+
+       if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE)
+               return arm_spe_get_data_source(buf, len, packet);
+
+       if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT)
+               return arm_spe_get_context(buf, len, packet);
+
+       if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE)
+               return arm_spe_get_op_type(buf, len, packet);
+
+       if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) {
+               /* 16-bit extended format header */
+               ext_hdr = 1;
+
+               hdr = buf[1];
+               if (hdr == SPE_HEADER1_ALIGNMENT)
                        return arm_spe_get_alignment(buf, len, packet);
-               else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS)
-                       return arm_spe_get_addr(buf, len, 1, packet);
-               else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER)
-                       return arm_spe_get_counter(buf, len, 1, packet);
        }
 
+       /*
+        * The short format header's byte 0 or the extended format header's
+        * byte 1 has been assigned to 'hdr', which uses the same encoding for
+        * address packet and counter packet, so don't need to distinguish if
+        * it's short format or extended format and handle in once.
+        */
+       if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS)
+               return arm_spe_get_addr(buf, len, ext_hdr, packet);
+
+       if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER)
+               return arm_spe_get_counter(buf, len, ext_hdr, packet);
+
        return ARM_SPE_BAD_PACKET;
 }
 
@@ -271,192 +248,286 @@ int arm_spe_get_packet(const unsigned char *buf, size_t len,
        return ret;
 }
 
+static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen,
+                                 const char *fmt, ...)
+{
+       va_list ap;
+       int ret;
+
+       /* Bail out if any error occurred */
+       if (err && *err)
+               return *err;
+
+       va_start(ap, fmt);
+       ret = vsnprintf(*buf_p, *blen, fmt, ap);
+       va_end(ap);
+
+       if (ret < 0) {
+               if (err && !*err)
+                       *err = ret;
+
+       /*
+        * A return value of *blen or more means that the output was
+        * truncated and the buffer is overrun.
+        */
+       } else if ((size_t)ret >= *blen) {
+               (*buf_p)[*blen - 1] = '\0';
+
+               /*
+                * Set *err to 'ret' to avoid overflow if tries to
+                * fill this buffer sequentially.
+                */
+               if (err && !*err)
+                       *err = ret;
+       } else {
+               *buf_p += ret;
+               *blen -= ret;
+       }
+
+       return ret;
+}
+
+static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
+                                 char *buf, size_t buf_len)
+{
+       u64 payload = packet->payload;
+       int err = 0;
+
+       arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV");
+
+       if (payload & BIT(EV_EXCEPTION_GEN))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN");
+       if (payload & BIT(EV_RETIRED))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED");
+       if (payload & BIT(EV_L1D_ACCESS))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS");
+       if (payload & BIT(EV_L1D_REFILL))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL");
+       if (payload & BIT(EV_TLB_ACCESS))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS");
+       if (payload & BIT(EV_TLB_WALK))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL");
+       if (payload & BIT(EV_NOT_TAKEN))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN");
+       if (payload & BIT(EV_MISPRED))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED");
+       if (payload & BIT(EV_LLC_ACCESS))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
+       if (payload & BIT(EV_LLC_MISS))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
+       if (payload & BIT(EV_REMOTE_ACCESS))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
+       if (payload & BIT(EV_ALIGNMENT))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+       if (payload & BIT(EV_PARTIAL_PREDICATE))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
+       if (payload & BIT(EV_EMPTY_PREDICATE))
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED");
+
+       return err;
+}
+
+static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
+                                   char *buf, size_t buf_len)
+{
+       u64 payload = packet->payload;
+       int err = 0;
+
+       switch (packet->index) {
+       case SPE_OP_PKT_HDR_CLASS_OTHER:
+               if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
+
+                       /* SVE effective vector length */
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+                                              SPE_OP_PKG_SVE_EVL(payload));
+
+                       if (payload & SPE_OP_PKT_SVE_FP)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
+                       if (payload & SPE_OP_PKT_SVE_PRED)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+               } else {
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
+                                              payload & SPE_OP_PKT_COND ?
+                                              "COND-SELECT" : "INSN-OTHER");
+               }
+               break;
+       case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
+               arm_spe_pkt_out_string(&err, &buf, &buf_len,
+                                      payload & 0x1 ? "ST" : "LD");
+
+               if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+                       if (payload & SPE_OP_PKT_AT)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
+                       if (payload & SPE_OP_PKT_EXCL)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
+                       if (payload & SPE_OP_PKT_AR)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
+               }
+
+               switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
+               case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
+                       break;
+               case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
+                       break;
+               case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
+                       break;
+               case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
+                       break;
+               default:
+                       break;
+               }
+
+               if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
+                       /* SVE effective vector length */
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+                                              SPE_OP_PKG_SVE_EVL(payload));
+
+                       if (payload & SPE_OP_PKT_SVE_PRED)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+                       if (payload & SPE_OP_PKT_SVE_SG)
+                               arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+               }
+               break;
+       case SPE_OP_PKT_HDR_CLASS_BR_ERET:
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, "B");
+
+               if (payload & SPE_OP_PKT_COND)
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
+
+               if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
+                       arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
+
+               break;
+       default:
+               /* Unknown index */
+               err = -1;
+               break;
+       }
+
+       return err;
+}
+
+static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
+                                char *buf, size_t buf_len)
+{
+       int ns, el, idx = packet->index;
+       int ch, pat;
+       u64 payload = packet->payload;
+       int err = 0;
+
+       switch (idx) {
+       case SPE_ADDR_PKT_HDR_INDEX_INS:
+       case SPE_ADDR_PKT_HDR_INDEX_BRANCH:
+               ns = !!SPE_ADDR_PKT_GET_NS(payload);
+               el = SPE_ADDR_PKT_GET_EL(payload);
+               payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+               arm_spe_pkt_out_string(&err, &buf, &buf_len,
+                               "%s 0x%llx el%d ns=%d",
+                               (idx == 1) ? "TGT" : "PC", payload, el, ns);
+               break;
+       case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT:
+               arm_spe_pkt_out_string(&err, &buf, &buf_len,
+                                      "VA 0x%llx", payload);
+               break;
+       case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS:
+               ns = !!SPE_ADDR_PKT_GET_NS(payload);
+               ch = !!SPE_ADDR_PKT_GET_CH(payload);
+               pat = SPE_ADDR_PKT_GET_PAT(payload);
+               payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+               arm_spe_pkt_out_string(&err, &buf, &buf_len,
+                                      "PA 0x%llx ns=%d ch=%d pat=%x",
+                                      payload, ns, ch, pat);
+               break;
+       default:
+               /* Unknown index */
+               err = -1;
+               break;
+       }
+
+       return err;
+}
+
+static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet,
+                                   char *buf, size_t buf_len)
+{
+       u64 payload = packet->payload;
+       const char *name = arm_spe_pkt_name(packet->type);
+       int err = 0;
+
+       arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name,
+                              (unsigned short)payload);
+
+       switch (packet->index) {
+       case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT:
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT");
+               break;
+       case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT:
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE");
+               break;
+       case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT:
+               arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT");
+               break;
+       default:
+               break;
+       }
+
+       return err;
+}
+
 int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
                     size_t buf_len)
 {
-       int ret, ns, el, idx = packet->index;
+       int idx = packet->index;
        unsigned long long payload = packet->payload;
        const char *name = arm_spe_pkt_name(packet->type);
+       char *buf_orig = buf;
+       size_t blen = buf_len;
+       int err = 0;
 
        switch (packet->type) {
        case ARM_SPE_BAD:
        case ARM_SPE_PAD:
        case ARM_SPE_END:
-               return snprintf(buf, buf_len, "%s", name);
-       case ARM_SPE_EVENTS: {
-               size_t blen = buf_len;
-
-               ret = 0;
-               ret = snprintf(buf, buf_len, "EV");
-               buf += ret;
-               blen -= ret;
-               if (payload & 0x1) {
-                       ret = snprintf(buf, buf_len, " EXCEPTION-GEN");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x2) {
-                       ret = snprintf(buf, buf_len, " RETIRED");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x4) {
-                       ret = snprintf(buf, buf_len, " L1D-ACCESS");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x8) {
-                       ret = snprintf(buf, buf_len, " L1D-REFILL");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x10) {
-                       ret = snprintf(buf, buf_len, " TLB-ACCESS");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x20) {
-                       ret = snprintf(buf, buf_len, " TLB-REFILL");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x40) {
-                       ret = snprintf(buf, buf_len, " NOT-TAKEN");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (payload & 0x80) {
-                       ret = snprintf(buf, buf_len, " MISPRED");
-                       buf += ret;
-                       blen -= ret;
-               }
-               if (idx > 1) {
-                       if (payload & 0x100) {
-                               ret = snprintf(buf, buf_len, " LLC-ACCESS");
-                               buf += ret;
-                               blen -= ret;
-                       }
-                       if (payload & 0x200) {
-                               ret = snprintf(buf, buf_len, " LLC-REFILL");
-                               buf += ret;
-                               blen -= ret;
-                       }
-                       if (payload & 0x400) {
-                               ret = snprintf(buf, buf_len, " REMOTE-ACCESS");
-                               buf += ret;
-                               blen -= ret;
-                       }
-               }
-               if (ret < 0)
-                       return ret;
-               blen -= ret;
-               return buf_len - blen;
-       }
+               arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name);
+               break;
+       case ARM_SPE_EVENTS:
+               err = arm_spe_pkt_desc_event(packet, buf, buf_len);
+               break;
        case ARM_SPE_OP_TYPE:
-               switch (idx) {
-               case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ?
-                                       "COND-SELECT" : "INSN-OTHER");
-               case 1: {
-                       size_t blen = buf_len;
-
-                       if (payload & 0x1)
-                               ret = snprintf(buf, buf_len, "ST");
-                       else
-                               ret = snprintf(buf, buf_len, "LD");
-                       buf += ret;
-                       blen -= ret;
-                       if (payload & 0x2) {
-                               if (payload & 0x4) {
-                                       ret = snprintf(buf, buf_len, " AT");
-                                       buf += ret;
-                                       blen -= ret;
-                               }
-                               if (payload & 0x8) {
-                                       ret = snprintf(buf, buf_len, " EXCL");
-                                       buf += ret;
-                                       blen -= ret;
-                               }
-                               if (payload & 0x10) {
-                                       ret = snprintf(buf, buf_len, " AR");
-                                       buf += ret;
-                                       blen -= ret;
-                               }
-                       } else if (payload & 0x4) {
-                               ret = snprintf(buf, buf_len, " SIMD-FP");
-                               buf += ret;
-                               blen -= ret;
-                       }
-                       if (ret < 0)
-                               return ret;
-                       blen -= ret;
-                       return buf_len - blen;
-               }
-               case 2: {
-                       size_t blen = buf_len;
-
-                       ret = snprintf(buf, buf_len, "B");
-                       buf += ret;
-                       blen -= ret;
-                       if (payload & 0x1) {
-                               ret = snprintf(buf, buf_len, " COND");
-                               buf += ret;
-                               blen -= ret;
-                       }
-                       if (payload & 0x2) {
-                               ret = snprintf(buf, buf_len, " IND");
-                               buf += ret;
-                               blen -= ret;
-                       }
-                       if (ret < 0)
-                               return ret;
-                       blen -= ret;
-                       return buf_len - blen;
-                       }
-               default: return 0;
-               }
+               err = arm_spe_pkt_desc_op_type(packet, buf, buf_len);
+               break;
        case ARM_SPE_DATA_SOURCE:
        case ARM_SPE_TIMESTAMP:
-               return snprintf(buf, buf_len, "%s %lld", name, payload);
+               arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload);
+               break;
        case ARM_SPE_ADDRESS:
-               switch (idx) {
-               case 0:
-               case 1: ns = !!(packet->payload & NS_FLAG);
-                       el = (packet->payload & EL_FLAG) >> 61;
-                       payload &= ~(0xffULL << 56);
-                       return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d",
-                                       (idx == 1) ? "TGT" : "PC", payload, el, ns);
-               case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload);
-               case 3: ns = !!(packet->payload & NS_FLAG);
-                       payload &= ~(0xffULL << 56);
-                       return snprintf(buf, buf_len, "PA 0x%llx ns=%d",
-                                       payload, ns);
-               default: return 0;
-               }
+               err = arm_spe_pkt_desc_addr(packet, buf, buf_len);
+               break;
        case ARM_SPE_CONTEXT:
-               return snprintf(buf, buf_len, "%s 0x%lx el%d", name,
-                               (unsigned long)payload, idx + 1);
-       case ARM_SPE_COUNTER: {
-               size_t blen = buf_len;
-
-               ret = snprintf(buf, buf_len, "%s %d ", name,
-                              (unsigned short)payload);
-               buf += ret;
-               blen -= ret;
-               switch (idx) {
-               case 0: ret = snprintf(buf, buf_len, "TOT"); break;
-               case 1: ret = snprintf(buf, buf_len, "ISSUE"); break;
-               case 2: ret = snprintf(buf, buf_len, "XLAT"); break;
-               default: ret = 0;
-               }
-               if (ret < 0)
-                       return ret;
-               blen -= ret;
-               return buf_len - blen;
-       }
+               arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d",
+                                      name, (unsigned long)payload, idx + 1);
+               break;
+       case ARM_SPE_COUNTER:
+               err = arm_spe_pkt_desc_counter(packet, buf, buf_len);
+               break;
        default:
+               /* Unknown packet type */
+               err = -1;
                break;
        }
 
-       return snprintf(buf, buf_len, "%s 0x%llx (%d)",
-                       name, payload, packet->index);
+       /* Output raw data if detect any error */
+       if (err) {
+               err = 0;
+               arm_spe_pkt_out_string(&err, &buf_orig, &buf_len, "%s 0x%llx (%d)",
+                                      name, payload, packet->index);
+       }
+
+       return err;
 }
index 4c87052..9b970e7 100644 (file)
@@ -36,19 +36,115 @@ struct arm_spe_pkt {
        uint64_t                payload;
 };
 
-#define SPE_ADDR_PKT_HDR_INDEX_INS             (0x0)
-#define SPE_ADDR_PKT_HDR_INDEX_BRANCH          (0x1)
-#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT       (0x2)
-#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS       (0x3)
-
-#define SPE_ADDR_PKT_NS                                BIT(7)
-#define SPE_ADDR_PKT_CH                                BIT(6)
-#define SPE_ADDR_PKT_EL_OFFSET                 (5)
-#define SPE_ADDR_PKT_EL_MASK                   (0x3 << SPE_ADDR_PKT_EL_OFFSET)
-#define SPE_ADDR_PKT_EL0                       (0)
-#define SPE_ADDR_PKT_EL1                       (1)
-#define SPE_ADDR_PKT_EL2                       (2)
-#define SPE_ADDR_PKT_EL3                       (3)
+/* Short header (HEADER0) and extended header (HEADER1) */
+#define SPE_HEADER0_PAD                                0x0
+#define SPE_HEADER0_END                                0x1
+#define SPE_HEADER0_TIMESTAMP                  0x71
+/* Mask for event & data source */
+#define SPE_HEADER0_MASK1                      (GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0))
+#define SPE_HEADER0_EVENTS                     0x42
+#define SPE_HEADER0_SOURCE                     0x43
+/* Mask for context & operation */
+#define SPE_HEADER0_MASK2                      GENMASK_ULL(7, 2)
+#define SPE_HEADER0_CONTEXT                    0x64
+#define SPE_HEADER0_OP_TYPE                    0x48
+/* Mask for extended format */
+#define SPE_HEADER0_EXTENDED                   0x20
+/* Mask for address & counter */
+#define SPE_HEADER0_MASK3                      GENMASK_ULL(7, 3)
+#define SPE_HEADER0_ADDRESS                    0xb0
+#define SPE_HEADER0_COUNTER                    0x98
+#define SPE_HEADER1_ALIGNMENT                  0x0
+
+#define SPE_HDR_SHORT_INDEX(h)                 ((h) & GENMASK_ULL(2, 0))
+#define SPE_HDR_EXTENDED_INDEX(h0, h1)         (((h0) & GENMASK_ULL(1, 0)) << 3 | \
+                                                SPE_HDR_SHORT_INDEX(h1))
+
+/* Address packet header */
+#define SPE_ADDR_PKT_HDR_INDEX_INS             0x0
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH          0x1
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT       0x2
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS       0x3
+
+/* Address packet payload */
+#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT          56
+#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v)     ((v) & GENMASK_ULL(55, 0))
+#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v)                (((v) & GENMASK_ULL(55, 48)) >> 48)
+
+#define SPE_ADDR_PKT_GET_NS(v)                 (((v) & BIT_ULL(63)) >> 63)
+#define SPE_ADDR_PKT_GET_EL(v)                 (((v) & GENMASK_ULL(62, 61)) >> 61)
+#define SPE_ADDR_PKT_GET_CH(v)                 (((v) & BIT_ULL(62)) >> 62)
+#define SPE_ADDR_PKT_GET_PAT(v)                        (((v) & GENMASK_ULL(59, 56)) >> 56)
+
+#define SPE_ADDR_PKT_EL0                       0
+#define SPE_ADDR_PKT_EL1                       1
+#define SPE_ADDR_PKT_EL2                       2
+#define SPE_ADDR_PKT_EL3                       3
+
+/* Context packet header */
+#define SPE_CTX_PKT_HDR_INDEX(h)               ((h) & GENMASK_ULL(1, 0))
+
+/* Counter packet header */
+#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT                0x0
+#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT                0x1
+#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT                0x2
+
+/* Event packet payload */
+enum arm_spe_events {
+       EV_EXCEPTION_GEN        = 0,
+       EV_RETIRED              = 1,
+       EV_L1D_ACCESS           = 2,
+       EV_L1D_REFILL           = 3,
+       EV_TLB_ACCESS           = 4,
+       EV_TLB_WALK             = 5,
+       EV_NOT_TAKEN            = 6,
+       EV_MISPRED              = 7,
+       EV_LLC_ACCESS           = 8,
+       EV_LLC_MISS             = 9,
+       EV_REMOTE_ACCESS        = 10,
+       EV_ALIGNMENT            = 11,
+       EV_PARTIAL_PREDICATE    = 17,
+       EV_EMPTY_PREDICATE      = 18,
+};
+
+/* Operation packet header */
+#define SPE_OP_PKT_HDR_CLASS(h)                        ((h) & GENMASK_ULL(1, 0))
+#define SPE_OP_PKT_HDR_CLASS_OTHER             0x0
+#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC      0x1
+#define SPE_OP_PKT_HDR_CLASS_BR_ERET           0x2
+
+#define SPE_OP_PKT_IS_OTHER_SVE_OP(v)          (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+
+#define SPE_OP_PKT_COND                                BIT(0)
+
+#define SPE_OP_PKT_LDST_SUBCLASS_GET(v)                ((v) & GENMASK_ULL(7, 1))
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG                0x0
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP       0x4
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG    0x10
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG     0x30
+
+#define SPE_OP_PKT_IS_LDST_ATOMIC(v)           (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+
+#define SPE_OP_PKT_AR                          BIT(4)
+#define SPE_OP_PKT_EXCL                                BIT(3)
+#define SPE_OP_PKT_AT                          BIT(2)
+#define SPE_OP_PKT_ST                          BIT(0)
+
+#define SPE_OP_PKT_IS_LDST_SVE(v)              (((v) & (BIT(3) | BIT(1))) == 0x8)
+
+#define SPE_OP_PKT_SVE_SG                      BIT(7)
+/*
+ * SVE effective vector length (EVL) is stored in byte 0 bits [6:4];
+ * the length is rounded up to a power of two and use 32 as one step,
+ * so EVL calculation is:
+ *
+ *   32 * (2 ^ bits [6:4]) = 32 << (bits [6:4])
+ */
+#define SPE_OP_PKG_SVE_EVL(v)                  (32 << (((v) & GENMASK_ULL(6, 4)) >> 4))
+#define SPE_OP_PKT_SVE_PRED                    BIT(2)
+#define SPE_OP_PKT_SVE_FP                      BIT(1)
+
+#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v)       (((v) & GENMASK_ULL(7, 1)) == 0x2)
 
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
index 3882a53..8901a16 100644 (file)
@@ -113,7 +113,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
                if (ret > 0) {
                        ret = arm_spe_pkt_desc(&packet, desc,
                                               ARM_SPE_PKT_DESC_MAX);
-                       if (ret > 0)
+                       if (!ret)
                                color_fprintf(stdout, color, " %s\n", desc);
                } else {
                        color_fprintf(stdout, color, " Bad packet!\n");
index 42a85c8..a608784 100644 (file)
@@ -62,9 +62,7 @@
  * Make a group from 'leader' to 'last', requiring that the events were not
  * already grouped to a different leader.
  */
-static int perf_evlist__regroup(struct evlist *evlist,
-                               struct evsel *leader,
-                               struct evsel *last)
+static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct evsel *last)
 {
        struct evsel *evsel;
        bool grp;
@@ -658,8 +656,7 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
                if (evsel->core.attr.type == itr->pmu->type) {
                        if (evsel->disabled)
                                return 0;
-                       return perf_evlist__enable_event_idx(itr->evlist, evsel,
-                                                            idx);
+                       return evlist__enable_event_idx(itr->evlist, evsel, idx);
                }
        }
        return -EINVAL;
@@ -776,7 +773,7 @@ no_opt:
                        evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
                        /* If possible, group with the AUX event */
                        if (aux_evsel && evsel->core.attr.aux_sample_size)
-                               perf_evlist__regroup(evlist, aux_evsel, evsel);
+                               evlist__regroup(evlist, aux_evsel, evsel);
                }
        }
 
@@ -1017,7 +1014,7 @@ struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *que
        if (!id)
                return NULL;
 
-       sid = perf_evlist__id2sid(session->evlist, id);
+       sid = evlist__id2sid(session->evlist, id);
        if (!sid)
                return NULL;
 
@@ -1047,7 +1044,7 @@ int auxtrace_queues__add_sample(struct auxtrace_queues *queues,
        if (!id)
                return -EINVAL;
 
-       sid = perf_evlist__id2sid(session->evlist, id);
+       sid = evlist__id2sid(session->evlist, id);
        if (!sid)
                return -ENOENT;
 
@@ -1082,7 +1079,7 @@ static int auxtrace_queue_data_cb(struct perf_session *session,
        if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
                return 0;
 
-       err = perf_evlist__parse_sample(session->evlist, event, &sample);
+       err = evlist__parse_sample(session->evlist, event, &sample);
        if (err)
                return err;
 
@@ -1333,6 +1330,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
        synth_opts->flc = true;
        synth_opts->llc = true;
        synth_opts->tlb = true;
+       synth_opts->mem = true;
        synth_opts->remote_access = true;
 
        if (no_sample) {
@@ -1554,6 +1552,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
                case 'a':
                        synth_opts->remote_access = true;
                        break;
+               case 'M':
+                       synth_opts->mem = true;
+                       break;
                case 'q':
                        synth_opts->quick += 1;
                        break;
index 951d2d1..7e5c9e1 100644 (file)
@@ -88,6 +88,7 @@ enum itrace_period_type {
  * @llc: whether to synthesize last level cache events
  * @tlb: whether to synthesize TLB events
  * @remote_access: whether to synthesize remote access events
+ * @mem: whether to synthesize memory events
  * @callchain_sz: maximum callchain size
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
@@ -126,6 +127,7 @@ struct itrace_synth_opts {
        bool                    llc;
        bool                    tlb;
        bool                    remote_access;
+       bool                    mem;
        unsigned int            callchain_sz;
        unsigned int            last_branch_sz;
        unsigned long long      period;
index 3742511..57d58c8 100644 (file)
@@ -526,7 +526,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
         */
        attr.wakeup_watermark = 1;
 
-       return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
+       return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
 }
 
 void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
index 0374adc..9087f1b 100644 (file)
@@ -1058,12 +1058,11 @@ __bpf_map__config_event(struct bpf_map *map,
                        struct parse_events_term *term,
                        struct evlist *evlist)
 {
-       struct evsel *evsel;
        const struct bpf_map_def *def;
        struct bpf_map_op *op;
        const char *map_name = bpf_map__name(map);
+       struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
 
-       evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
        if (!evsel) {
                pr_debug("Event (for '%s') '%s' doesn't exist\n",
                         map_name, term->val.str);
index 25251d6..5d1c725 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/compiler.h>
 #include <linux/err.h>
+
+#ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/libbpf.h>
 
 enum bpf_loader_errno {
@@ -38,6 +40,7 @@ enum bpf_loader_errno {
        BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,  /* Index too large */
        __BPF_LOADER_ERRNO__END,
 };
+#endif // HAVE_LIBBPF_SUPPORT
 
 struct evsel;
 struct evlist;
index 6b410c3..02df36b 100644 (file)
@@ -37,6 +37,7 @@
 
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
+#include <linux/string.h>
 #include <asm/bug.h>
 
 static bool no_buildid_cache;
@@ -260,10 +261,9 @@ static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso,
            "debug" : "elf"));
 }
 
-char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
-                            bool is_debug)
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+                              bool is_debug, bool is_kallsyms)
 {
-       bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
        bool is_vdso = dso__is_vdso((struct dso *)dso);
        char sbuild_id[SBUILD_ID_SIZE];
        char *linkname;
@@ -292,6 +292,14 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
        return bf;
 }
 
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+                            bool is_debug)
+{
+       bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+
+       return __dso__build_id_filename(dso, bf, size, is_debug, is_kallsyms);
+}
+
 #define dsos__for_each_with_build_id(pos, head)        \
        list_for_each_entry(pos, head, node)    \
                if (!pos->has_build_id)         \
@@ -663,24 +671,15 @@ out:
        return realname;
 }
 
-int build_id_cache__add_s(const char *sbuild_id, const char *name,
-                         struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
+int
+build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+                   struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
 {
        const size_t size = PATH_MAX;
-       char *realname = NULL, *filename = NULL, *dir_name = NULL,
-            *linkname = zalloc(size), *tmp;
+       char *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp;
        char *debugfile = NULL;
        int err = -1;
 
-       if (!is_kallsyms) {
-               if (!is_vdso)
-                       realname = nsinfo__realpath(name, nsi);
-               else
-                       realname = realpath(name, NULL);
-               if (!realname)
-                       goto out_free;
-       }
-
        dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms,
                                            is_vdso);
        if (!dir_name)
@@ -754,8 +753,25 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
        tmp = dir_name + strlen(buildid_dir) - 5;
        memcpy(tmp, "../..", 5);
 
-       if (symlink(tmp, linkname) == 0)
+       if (symlink(tmp, linkname) == 0) {
+               err = 0;
+       } else if (errno == EEXIST) {
+               char path[PATH_MAX];
+               ssize_t len;
+
+               len = readlink(linkname, path, sizeof(path) - 1);
+               if (len <= 0) {
+                       pr_err("Cant read link: %s\n", linkname);
+                       goto out_free;
+               }
+               path[len] = '\0';
+
+               if (strcmp(tmp, path)) {
+                       pr_debug("build <%s> already linked to %s\n",
+                                sbuild_id, linkname);
+               }
                err = 0;
+       }
 
        /* Update SDT cache : error is just warned */
        if (realname &&
@@ -763,8 +779,6 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
                pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
 
 out_free:
-       if (!is_kallsyms)
-               free(realname);
        free(filename);
        free(debugfile);
        free(dir_name);
@@ -772,6 +786,29 @@ out_free:
        return err;
 }
 
+int build_id_cache__add_s(const char *sbuild_id, const char *name,
+                         struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
+{
+       char *realname = NULL;
+       int err = -1;
+
+       if (!is_kallsyms) {
+               if (!is_vdso)
+                       realname = nsinfo__realpath(name, nsi);
+               else
+                       realname = realpath(name, NULL);
+               if (!realname)
+                       goto out_free;
+       }
+
+       err = build_id_cache__add(sbuild_id, name, realname, nsi, is_kallsyms, is_vdso);
+
+out_free:
+       if (!is_kallsyms)
+               free(realname);
+       return err;
+}
+
 static int build_id_cache__add_b(const struct build_id *bid,
                                 const char *name, struct nsinfo *nsi,
                                 bool is_kallsyms, bool is_vdso)
@@ -834,12 +871,16 @@ out_free:
        return err;
 }
 
-static int dso__cache_build_id(struct dso *dso, struct machine *machine)
+static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+                              void *priv __maybe_unused)
 {
        bool is_kallsyms = dso__is_kallsyms(dso);
        bool is_vdso = dso__is_vdso(dso);
        const char *name = dso->long_name;
 
+       if (!dso->has_build_id)
+               return 0;
+
        if (dso__is_kcore(dso)) {
                is_kallsyms = true;
                name = machine->mmap_name;
@@ -848,43 +889,36 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
                                     is_kallsyms, is_vdso);
 }
 
-static int __dsos__cache_build_ids(struct list_head *head,
-                                  struct machine *machine)
+static int
+machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv)
 {
-       struct dso *pos;
-       int err = 0;
-
-       dsos__for_each_with_build_id(pos, head)
-               if (dso__cache_build_id(pos, machine))
-                       err = -1;
+       int ret = machine__for_each_dso(&machines->host, fn, priv);
+       struct rb_node *nd;
 
-       return err;
-}
+       for (nd = rb_first_cached(&machines->guests); nd;
+            nd = rb_next(nd)) {
+               struct machine *pos = rb_entry(nd, struct machine, rb_node);
 
-static int machine__cache_build_ids(struct machine *machine)
-{
-       return __dsos__cache_build_ids(&machine->dsos.head, machine);
+               ret |= machine__for_each_dso(pos, fn, priv);
+       }
+       return ret ? -1 : 0;
 }
 
-int perf_session__cache_build_ids(struct perf_session *session)
+int __perf_session__cache_build_ids(struct perf_session *session,
+                                   machine__dso_t fn, void *priv)
 {
-       struct rb_node *nd;
-       int ret;
-
        if (no_buildid_cache)
                return 0;
 
        if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
                return -1;
 
-       ret = machine__cache_build_ids(&session->machines.host);
+       return machines__for_each_dso(&session->machines, fn, priv) ?  -1 : 0;
+}
 
-       for (nd = rb_first_cached(&session->machines.guests); nd;
-            nd = rb_next(nd)) {
-               struct machine *pos = rb_entry(nd, struct machine, rb_node);
-               ret |= machine__cache_build_ids(pos);
-       }
-       return ret ? -1 : 0;
+int perf_session__cache_build_ids(struct perf_session *session)
+{
+       return __perf_session__cache_build_ids(session, dso__cache_build_id, NULL);
 }
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
@@ -912,3 +946,8 @@ void build_id__init(struct build_id *bid, const u8 *data, size_t size)
        memcpy(bid->data, data, size);
        bid->size = size;
 }
+
+bool build_id__is_defined(const struct build_id *bid)
+{
+       return bid && bid->size ? !!memchr_inv(bid->data, 0, bid->size) : false;
+}
index f293f99..02613f4 100644 (file)
@@ -5,6 +5,7 @@
 #define BUILD_ID_SIZE  20
 #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
 
+#include "machine.h"
 #include "tool.h"
 #include <linux/types.h>
 
@@ -21,6 +22,7 @@ struct feat_fd;
 
 void build_id__init(struct build_id *bid, const u8 *data, size_t size);
 int build_id__sprintf(const struct build_id *build_id, char *bf);
+bool build_id__is_defined(const struct build_id *bid);
 int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
 char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
@@ -28,6 +30,8 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
                             bool is_debug);
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+                              bool is_debug, bool is_kallsyms);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
                           struct perf_sample *sample, struct evsel *evsel,
@@ -43,6 +47,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
 int perf_session__write_buildid_table(struct perf_session *session,
                                      struct feat_fd *fd);
 int perf_session__cache_build_ids(struct perf_session *session);
+int __perf_session__cache_build_ids(struct perf_session *session,
+                                   machine__dso_t fn, void *priv);
 
 char *build_id_cache__origname(const char *sbuild_id);
 char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
@@ -57,6 +63,8 @@ char *build_id_cache__complement(const char *incomplete_sbuild_id);
 int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
                                   struct strlist **result);
 bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+                       struct nsinfo *nsi, bool is_kallsyms, bool is_vdso);
 int build_id_cache__add_s(const char *sbuild_id,
                          const char *name, struct nsinfo *nsi,
                          bool is_kallsyms, bool is_vdso);
index b81324a..5dff7e4 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 #include <api/fs/fs.h>
+#include <ftw.h>
+#include <regex.h>
 
 int nr_cgroups;
 
+/* used to match cgroup name with patterns */
+struct cgroup_name {
+       struct list_head list;
+       bool used;
+       char name[];
+};
+static LIST_HEAD(cgroup_list);
+
 static int open_cgroup(const char *name)
 {
        char path[PATH_MAX + 1];
@@ -149,6 +159,137 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup)
                evsel__set_default_cgroup(evsel, cgroup);
 }
 
+/* helper function for ftw() in match_cgroups and list_cgroups */
+static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused,
+                          int typeflag)
+{
+       struct cgroup_name *cn;
+
+       if (typeflag != FTW_D)
+               return 0;
+
+       cn = malloc(sizeof(*cn) + strlen(fpath) + 1);
+       if (cn == NULL)
+               return -1;
+
+       cn->used = false;
+       strcpy(cn->name, fpath);
+
+       list_add_tail(&cn->list, &cgroup_list);
+       return 0;
+}
+
+static void release_cgroup_list(void)
+{
+       struct cgroup_name *cn;
+
+       while (!list_empty(&cgroup_list)) {
+               cn = list_first_entry(&cgroup_list, struct cgroup_name, list);
+               list_del(&cn->list);
+               free(cn);
+       }
+}
+
+/* collect given cgroups only */
+static int list_cgroups(const char *str)
+{
+       const char *p, *e, *eos = str + strlen(str);
+       struct cgroup_name *cn;
+       char *s;
+
+       /* use given name as is - for testing purpose */
+       for (;;) {
+               p = strchr(str, ',');
+               e = p ? p : eos;
+
+               if (e - str) {
+                       int ret;
+
+                       s = strndup(str, e - str);
+                       if (!s)
+                               return -1;
+                       /* pretend if it's added by ftw() */
+                       ret = add_cgroup_name(s, NULL, FTW_D);
+                       free(s);
+                       if (ret)
+                               return -1;
+               } else {
+                       if (add_cgroup_name("", NULL, FTW_D) < 0)
+                               return -1;
+               }
+
+               if (!p)
+                       break;
+               str = p+1;
+       }
+
+       /* these groups will be used */
+       list_for_each_entry(cn, &cgroup_list, list)
+               cn->used = true;
+
+       return 0;
+}
+
+/* collect all cgroups first and then match with the pattern */
+static int match_cgroups(const char *str)
+{
+       char mnt[PATH_MAX];
+       const char *p, *e, *eos = str + strlen(str);
+       struct cgroup_name *cn;
+       regex_t reg;
+       int prefix_len;
+       char *s;
+
+       if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event"))
+               return -1;
+
+       /* cgroup_name will have a full path, skip the root directory */
+       prefix_len = strlen(mnt);
+
+       /* collect all cgroups in the cgroup_list */
+       if (ftw(mnt, add_cgroup_name, 20) < 0)
+               return -1;
+
+       for (;;) {
+               p = strchr(str, ',');
+               e = p ? p : eos;
+
+               /* allow empty cgroups, i.e., skip */
+               if (e - str) {
+                       /* termination added */
+                       s = strndup(str, e - str);
+                       if (!s)
+                               return -1;
+                       if (regcomp(&reg, s, REG_NOSUB)) {
+                               free(s);
+                               return -1;
+                       }
+
+                       /* check cgroup name with the pattern */
+                       list_for_each_entry(cn, &cgroup_list, list) {
+                               char *name = cn->name + prefix_len;
+
+                               if (name[0] == '/' && name[1])
+                                       name++;
+                               if (!regexec(&reg, name, 0, NULL, 0))
+                                       cn->used = true;
+                       }
+                       regfree(&reg);
+                       free(s);
+               } else {
+                       /* first entry to root cgroup */
+                       cn = list_first_entry(&cgroup_list, struct cgroup_name,
+                                             list);
+                       cn->used = true;
+               }
+
+               if (!p)
+                       break;
+               str = p+1;
+       }
+       return prefix_len;
+}
+
 int parse_cgroups(const struct option *opt, const char *str,
                  int unset __maybe_unused)
 {
@@ -201,6 +342,11 @@ int parse_cgroups(const struct option *opt, const char *str,
        return 0;
 }
 
+static bool has_pattern_string(const char *str)
+{
+       return !!strpbrk(str, "{}[]()|*+?^$");
+}
+
 int evlist__expand_cgroup(struct evlist *evlist, const char *str,
                          struct rblist *metric_events, bool open_cgroup)
 {
@@ -208,8 +354,9 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
        struct evsel *pos, *evsel, *leader;
        struct rblist orig_metric_events;
        struct cgroup *cgrp = NULL;
-       const char *p, *e, *eos = str + strlen(str);
+       struct cgroup_name *cn;
        int ret = -1;
+       int prefix_len;
 
        if (evlist->core.nr_entries == 0) {
                fprintf(stderr, "must define events before cgroups\n");
@@ -224,7 +371,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
        }
 
        /* save original events and init evlist */
-       perf_evlist__splice_list_tail(orig_list, &evlist->core.entries);
+       evlist__splice_list_tail(orig_list, &evlist->core.entries);
        evlist->core.nr_entries = 0;
 
        if (metric_events) {
@@ -234,24 +381,27 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
                rblist__init(&orig_metric_events);
        }
 
-       for (;;) {
-               p = strchr(str, ',');
-               e = p ? p : eos;
+       if (has_pattern_string(str))
+               prefix_len = match_cgroups(str);
+       else
+               prefix_len = list_cgroups(str);
 
-               /* allow empty cgroups, i.e., skip */
-               if (e - str) {
-                       /* termination added */
-                       char *name = strndup(str, e - str);
-                       if (!name)
-                               goto out_err;
+       if (prefix_len < 0)
+               goto out_err;
 
-                       cgrp = cgroup__new(name, open_cgroup);
-                       free(name);
-                       if (cgrp == NULL)
-                               goto out_err;
-               } else {
-                       cgrp = NULL;
-               }
+       list_for_each_entry(cn, &cgroup_list, list) {
+               char *name;
+
+               if (!cn->used)
+                       continue;
+
+               /* cgroup_name might have a full path, skip the prefix */
+               name = cn->name + prefix_len;
+               if (name[0] == '/' && name[1])
+                       name++;
+               cgrp = cgroup__new(name, open_cgroup);
+               if (cgrp == NULL)
+                       goto out_err;
 
                leader = NULL;
                evlist__for_each_entry(orig_list, pos) {
@@ -277,23 +427,25 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
                        if (metricgroup__copy_metric_events(tmp_list, cgrp,
                                                            metric_events,
                                                            &orig_metric_events) < 0)
-                               break;
+                               goto out_err;
                }
 
-               perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries);
+               evlist__splice_list_tail(evlist, &tmp_list->core.entries);
                tmp_list->core.nr_entries = 0;
+       }
 
-               if (!p) {
-                       ret = 0;
-                       break;
-               }
-               str = p+1;
+       if (list_empty(&evlist->core.entries)) {
+               fprintf(stderr, "no cgroup matched: %s\n", str);
+               goto out_err;
        }
 
+       ret = 0;
+
 out_err:
        evlist__delete(orig_list);
        evlist__delete(tmp_list);
        rblist__exit(&orig_metric_events);
+       release_cgroup_list();
 
        return ret;
 }
index c47aa34..f29af4f 100644 (file)
@@ -174,8 +174,21 @@ static bool check_pipe(struct perf_data *data)
                        is_pipe = true;
        }
 
-       if (is_pipe)
-               data->file.fd = fd;
+       if (is_pipe) {
+               if (data->use_stdio) {
+                       const char *mode;
+
+                       mode = perf_data__is_read(data) ? "r" : "w";
+                       data->file.fptr = fdopen(fd, mode);
+
+                       if (data->file.fptr == NULL) {
+                               data->file.fd = fd;
+                               data->use_stdio = false;
+                       }
+               } else {
+                       data->file.fd = fd;
+               }
+       }
 
        return data->is_pipe = is_pipe;
 }
@@ -334,6 +347,9 @@ int perf_data__open(struct perf_data *data)
        if (check_pipe(data))
                return 0;
 
+       /* currently it allows stdio for pipe only */
+       data->use_stdio = false;
+
        if (!data->path)
                data->path = "perf.data";
 
@@ -353,7 +369,21 @@ void perf_data__close(struct perf_data *data)
                perf_data__close_dir(data);
 
        zfree(&data->file.path);
-       close(data->file.fd);
+
+       if (data->use_stdio)
+               fclose(data->file.fptr);
+       else
+               close(data->file.fd);
+}
+
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
+{
+       if (data->use_stdio) {
+               if (fread(buf, size, 1, data->file.fptr) == 1)
+                       return size;
+               return feof(data->file.fptr) ? 0 : -1;
+       }
+       return readn(data->file.fd, buf, size);
 }
 
 ssize_t perf_data_file__write(struct perf_data_file *file,
@@ -365,6 +395,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
 ssize_t perf_data__write(struct perf_data *data,
                              void *buf, size_t size)
 {
+       if (data->use_stdio) {
+               if (fwrite(buf, size, 1, data->file.fptr) == 1)
+                       return size;
+               return -1;
+       }
        return perf_data_file__write(&data->file, buf, size);
 }
 
@@ -457,3 +492,22 @@ char *perf_data__kallsyms_name(struct perf_data *data)
 
        return kallsyms_name;
 }
+
+bool is_perf_data(const char *path)
+{
+       bool ret = false;
+       FILE *file;
+       u64 magic;
+
+       file = fopen(path, "r");
+       if (!file)
+               return false;
+
+       if (fread(&magic, 1, 8, file) < 8)
+               goto out;
+
+       ret = is_perf_magic(magic);
+out:
+       fclose(file);
+       return ret;
+}
index 75947ef..62a3e66 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef __PERF_DATA_H
 #define __PERF_DATA_H
 
+#include <stdio.h>
 #include <stdbool.h>
 
 enum perf_data_mode {
@@ -16,7 +17,10 @@ enum perf_dir_version {
 
 struct perf_data_file {
        char            *path;
-       int              fd;
+       union {
+               int      fd;
+               FILE    *fptr;
+       };
        unsigned long    size;
 };
 
@@ -26,6 +30,7 @@ struct perf_data {
        bool                     is_pipe;
        bool                     is_dir;
        bool                     force;
+       bool                     use_stdio;
        enum perf_data_mode      mode;
 
        struct {
@@ -62,11 +67,15 @@ static inline bool perf_data__is_single_file(struct perf_data *data)
 
 static inline int perf_data__fd(struct perf_data *data)
 {
+       if (data->use_stdio)
+               return fileno(data->file.fptr);
+
        return data->file.fd;
 }
 
 int perf_data__open(struct perf_data *data);
 void perf_data__close(struct perf_data *data);
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
 ssize_t perf_data__write(struct perf_data *data,
                              void *buf, size_t size);
 ssize_t perf_data_file__write(struct perf_data_file *file,
@@ -89,4 +98,5 @@ int perf_data__update_dir(struct perf_data *data);
 unsigned long perf_data__size(struct perf_data *data);
 int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
 char *perf_data__kallsyms_name(struct perf_data *data);
+bool is_perf_data(const char *path);
 #endif /* __PERF_DATA_H */
index 5cda556..50fd6a4 100644 (file)
@@ -30,6 +30,12 @@ bool dump_trace = false, quiet = false;
 int debug_ordered_events;
 static int redirect_to_stderr;
 int debug_data_convert;
+static FILE *debug_file;
+
+void debug_set_file(FILE *file)
+{
+       debug_file = file;
+}
 
 int veprintf(int level, int var, const char *fmt, va_list args)
 {
@@ -39,7 +45,7 @@ int veprintf(int level, int var, const char *fmt, va_list args)
                if (use_browser >= 1 && !redirect_to_stderr)
                        ui_helpline__vshow(fmt, args);
                else
-                       ret = vfprintf(stderr, fmt, args);
+                       ret = vfprintf(debug_file, fmt, args);
        }
 
        return ret;
@@ -227,6 +233,7 @@ DEBUG_WRAPPER(debug, 1);
 
 void perf_debug_setup(void)
 {
+       debug_set_file(stderr);
        libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
 }
 
index f1734ab..43f7122 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <stdarg.h>
 #include <stdbool.h>
+#include <stdio.h>
 #include <linux/compiler.h>
 
 extern int verbose;
@@ -62,6 +63,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5)
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+void debug_set_file(FILE *file);
 void perf_debug_setup(void);
 int perf_quiet_option(void);
 
index 55c11e8..d786cf6 100644 (file)
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
+#ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/libbpf.h>
 #include "bpf-event.h"
+#endif
 #include "compress.h"
 #include "env.h"
 #include "namespaces.h"
@@ -277,18 +279,12 @@ bool dso__needs_decompress(struct dso *dso)
                dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 }
 
-static int decompress_kmodule(struct dso *dso, const char *name,
-                             char *pathname, size_t len)
+int filename__decompress(const char *name, char *pathname,
+                        size_t len, int comp, int *err)
 {
        char tmpbuf[] = KMOD_DECOMP_NAME;
        int fd = -1;
 
-       if (!dso__needs_decompress(dso))
-               return -1;
-
-       if (dso->comp == COMP_ID__NONE)
-               return -1;
-
        /*
         * We have proper compression id for DSO and yet the file
         * behind the 'name' can still be plain uncompressed object.
@@ -302,17 +298,17 @@ static int decompress_kmodule(struct dso *dso, const char *name,
         * To keep this transparent, we detect this and return the file
         * descriptor to the uncompressed file.
         */
-       if (!compressions[dso->comp].is_compressed(name))
+       if (!compressions[comp].is_compressed(name))
                return open(name, O_RDONLY);
 
        fd = mkstemp(tmpbuf);
        if (fd < 0) {
-               dso->load_errno = errno;
+               *err = errno;
                return -1;
        }
 
-       if (compressions[dso->comp].decompress(name, fd)) {
-               dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+       if (compressions[comp].decompress(name, fd)) {
+               *err = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
                close(fd);
                fd = -1;
        }
@@ -326,6 +322,19 @@ static int decompress_kmodule(struct dso *dso, const char *name,
        return fd;
 }
 
+static int decompress_kmodule(struct dso *dso, const char *name,
+                             char *pathname, size_t len)
+{
+       if (!dso__needs_decompress(dso))
+               return -1;
+
+       if (dso->comp == COMP_ID__NONE)
+               return -1;
+
+       return filename__decompress(name, pathname, len, dso->comp,
+                                   &dso->load_errno);
+}
+
 int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
 {
        return decompress_kmodule(dso, name, NULL, 0);
@@ -728,6 +737,7 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
        return false;
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
 {
        struct bpf_prog_info_node *node;
@@ -765,6 +775,7 @@ static int bpf_size(struct dso *dso)
        dso->data.file_size = node->info_linear->info.jited_prog_len;
        return 0;
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static void
 dso_cache__free(struct dso *dso)
@@ -894,10 +905,12 @@ static struct dso_cache *dso_cache__populate(struct dso *dso,
                *ret = -ENOMEM;
                return NULL;
        }
-
+#ifdef HAVE_LIBBPF_SUPPORT
        if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
                *ret = bpf_read(dso, cache_offset, cache->data);
-       else if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+       else
+#endif
+       if (dso->binary_type == DSO_BINARY_TYPE__OOL)
                *ret = DSO__DATA_CACHE_SIZE;
        else
                *ret = file_read(dso, machine, cache_offset, cache->data);
@@ -1018,10 +1031,10 @@ int dso__data_file_size(struct dso *dso, struct machine *machine)
 
        if (dso->data.status == DSO_DATA_STATUS_ERROR)
                return -1;
-
+#ifdef HAVE_LIBBPF_SUPPORT
        if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
                return bpf_size(dso);
-
+#endif
        return file_size(dso, machine);
 }
 
index d8cb4f5..cd2fe64 100644 (file)
@@ -274,6 +274,8 @@ bool dso__needs_decompress(struct dso *dso);
 int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
 int dso__decompress_kmodule_path(struct dso *dso, const char *name,
                                 char *pathname, size_t len);
+int filename__decompress(const char *name, char *pathname,
+                        size_t len, int comp, int *err);
 
 #define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
 #define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
index fadc597..9130f6f 100644 (file)
@@ -5,16 +5,18 @@
 #include "util/header.h"
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
-#include "bpf-event.h"
 #include "cgroup.h"
 #include <errno.h>
 #include <sys/utsname.h>
-#include <bpf/libbpf.h>
 #include <stdlib.h>
 #include <string.h>
 
 struct perf_env perf_env;
 
+#ifdef HAVE_LIBBPF_SUPPORT
+#include "bpf-event.h"
+#include <bpf/libbpf.h>
+
 void perf_env__insert_bpf_prog_info(struct perf_env *env,
                                    struct bpf_prog_info_node *info_node)
 {
@@ -163,6 +165,11 @@ static void perf_env__purge_bpf(struct perf_env *env)
 
        up_write(&env->bpf_progs.lock);
 }
+#else // HAVE_LIBBPF_SUPPORT
+static void perf_env__purge_bpf(struct perf_env *env __maybe_unused)
+{
+}
+#endif // HAVE_LIBBPF_SUPPORT
 
 void perf_env__exit(struct perf_env *env)
 {
@@ -197,11 +204,13 @@ void perf_env__exit(struct perf_env *env)
        zfree(&env->memory_nodes);
 }
 
-void perf_env__init(struct perf_env *env)
+void perf_env__init(struct perf_env *env __maybe_unused)
 {
+#ifdef HAVE_LIBBPF_SUPPORT
        env->bpf_progs.infos = RB_ROOT;
        env->bpf_progs.btfs = RB_ROOT;
        init_rwsem(&env->bpf_progs.lock);
+#endif
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
index a129726..ca249bf 100644 (file)
@@ -77,7 +77,7 @@ struct perf_env {
        struct numa_node        *numa_nodes;
        struct memory_node      *memory_nodes;
        unsigned long long       memory_bsize;
-
+#ifdef HAVE_LIBBPF_SUPPORT
        /*
         * bpf_info_lock protects bpf rbtrees. This is needed because the
         * trees are accessed by different threads in perf-top
@@ -89,7 +89,7 @@ struct perf_env {
                struct rb_root          btfs;
                u32                     btfs_cnt;
        } bpf_progs;
-
+#endif // HAVE_LIBBPF_SUPPORT
        /* same reason as above (for perf-top) */
        struct {
                struct rw_semaphore     lock;
index b828b99..ff403ea 100644 (file)
@@ -135,6 +135,7 @@ struct perf_sample {
        u32 raw_size;
        u64 data_src;
        u64 phys_addr;
+       u64 data_page_size;
        u64 cgroup;
        u32 flags;
        u16 insn_len;
@@ -408,4 +409,7 @@ extern int sysctl_perf_event_max_stack;
 extern int sysctl_perf_event_max_contexts_per_stack;
 extern unsigned int proc_map_timeout;
 
+#define PAGE_SIZE_NAME_LEN     32
+char *get_page_size_name(u64 size, char *str);
+
 #endif /* __PERF_RECORD_H */
index 8bdf3d2..05363a7 100644 (file)
@@ -78,7 +78,7 @@ struct evlist *evlist__new(void)
        return evlist;
 }
 
-struct evlist *perf_evlist__new_default(void)
+struct evlist *evlist__new_default(void)
 {
        struct evlist *evlist = evlist__new();
 
@@ -90,7 +90,7 @@ struct evlist *perf_evlist__new_default(void)
        return evlist;
 }
 
-struct evlist *perf_evlist__new_dummy(void)
+struct evlist *evlist__new_dummy(void)
 {
        struct evlist *evlist = evlist__new();
 
@@ -103,13 +103,13 @@ struct evlist *perf_evlist__new_dummy(void)
 }
 
 /**
- * perf_evlist__set_id_pos - set the positions of event ids.
+ * evlist__set_id_pos - set the positions of event ids.
  * @evlist: selected event list
  *
  * Events with compatible sample types all have the same id_pos
  * and is_pos.  For convenience, put a copy on evlist.
  */
-void perf_evlist__set_id_pos(struct evlist *evlist)
+void evlist__set_id_pos(struct evlist *evlist)
 {
        struct evsel *first = evlist__first(evlist);
 
@@ -117,14 +117,14 @@ void perf_evlist__set_id_pos(struct evlist *evlist)
        evlist->is_pos = first->is_pos;
 }
 
-static void perf_evlist__update_id_pos(struct evlist *evlist)
+static void evlist__update_id_pos(struct evlist *evlist)
 {
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel)
                evsel__calc_id_pos(evsel);
 
-       perf_evlist__set_id_pos(evlist);
+       evlist__set_id_pos(evlist);
 }
 
 static void evlist__purge(struct evlist *evlist)
@@ -168,7 +168,7 @@ void evlist__add(struct evlist *evlist, struct evsel *entry)
        perf_evlist__add(&evlist->core, &entry->core);
 
        if (evlist->core.nr_entries == 1)
-               perf_evlist__set_id_pos(evlist);
+               evlist__set_id_pos(evlist);
 }
 
 void evlist__remove(struct evlist *evlist, struct evsel *evsel)
@@ -177,27 +177,36 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel)
        perf_evlist__remove(&evlist->core, &evsel->core);
 }
 
-void perf_evlist__splice_list_tail(struct evlist *evlist,
-                                  struct list_head *list)
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list)
 {
-       struct evsel *evsel, *temp;
+       while (!list_empty(list)) {
+               struct evsel *evsel, *temp, *leader = NULL;
 
-       __evlist__for_each_entry_safe(list, temp, evsel) {
-               list_del_init(&evsel->core.node);
-               evlist__add(evlist, evsel);
+               __evlist__for_each_entry_safe(list, temp, evsel) {
+                       list_del_init(&evsel->core.node);
+                       evlist__add(evlist, evsel);
+                       leader = evsel;
+                       break;
+               }
+
+               __evlist__for_each_entry_safe(list, temp, evsel) {
+                       if (evsel->leader == leader) {
+                               list_del_init(&evsel->core.node);
+                               evlist__add(evlist, evsel);
+                       }
+               }
        }
 }
 
 int __evlist__set_tracepoints_handlers(struct evlist *evlist,
                                       const struct evsel_str_handler *assocs, size_t nr_assocs)
 {
-       struct evsel *evsel;
        size_t i;
        int err;
 
        for (i = 0; i < nr_assocs; i++) {
                // Adding a handler for an event not in this evlist, just ignore it.
-               evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
+               struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name);
                if (evsel == NULL)
                        continue;
 
@@ -212,7 +221,7 @@ out:
        return err;
 }
 
-void __perf_evlist__set_leader(struct list_head *list)
+void __evlist__set_leader(struct list_head *list)
 {
        struct evsel *evsel, *leader;
 
@@ -226,11 +235,11 @@ void __perf_evlist__set_leader(struct list_head *list)
        }
 }
 
-void perf_evlist__set_leader(struct evlist *evlist)
+void evlist__set_leader(struct evlist *evlist)
 {
        if (evlist->core.nr_entries) {
                evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
-               __perf_evlist__set_leader(&evlist->core.entries);
+               __evlist__set_leader(&evlist->core.entries);
        }
 }
 
@@ -274,7 +283,7 @@ static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attr
                list_add_tail(&evsel->core.node, &head);
        }
 
-       perf_evlist__splice_list_tail(evlist, &head);
+       evlist__splice_list_tail(evlist, &head);
 
        return 0;
 
@@ -294,8 +303,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a
        return evlist__add_attrs(evlist, attrs, nr_attrs);
 }
 
-struct evsel *
-perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
 {
        struct evsel *evsel;
 
@@ -308,9 +316,7 @@ perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
        return NULL;
 }
 
-struct evsel *
-perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
-                                    const char *name)
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name)
 {
        struct evsel *evsel;
 
@@ -335,8 +341,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name,
        return 0;
 }
 
-static int perf_evlist__nr_threads(struct evlist *evlist,
-                                  struct evsel *evsel)
+static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
 {
        if (evsel->core.system_wide)
                return 1;
@@ -376,7 +381,30 @@ bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
        return true;
 }
 
-void evlist__disable(struct evlist *evlist)
+static int evsel__strcmp(struct evsel *pos, char *evsel_name)
+{
+       if (!evsel_name)
+               return 0;
+       if (evsel__is_dummy_event(pos))
+               return 1;
+       return strcmp(pos->name, evsel_name);
+}
+
+static int evlist__is_enabled(struct evlist *evlist)
+{
+       struct evsel *pos;
+
+       evlist__for_each_entry(evlist, pos) {
+               if (!evsel__is_group_leader(pos) || !pos->core.fd)
+                       continue;
+               /* If at least one event is enabled, evlist is enabled. */
+               if (!pos->disabled)
+                       return true;
+       }
+       return false;
+}
+
+static void __evlist__disable(struct evlist *evlist, char *evsel_name)
 {
        struct evsel *pos;
        struct affinity affinity;
@@ -392,6 +420,8 @@ void evlist__disable(struct evlist *evlist)
                        affinity__set(&affinity, cpu);
 
                        evlist__for_each_entry(evlist, pos) {
+                               if (evsel__strcmp(pos, evsel_name))
+                                       continue;
                                if (evsel__cpu_iter_skip(pos, cpu))
                                        continue;
                                if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
@@ -409,15 +439,34 @@ void evlist__disable(struct evlist *evlist)
 
        affinity__cleanup(&affinity);
        evlist__for_each_entry(evlist, pos) {
+               if (evsel__strcmp(pos, evsel_name))
+                       continue;
                if (!evsel__is_group_leader(pos) || !pos->core.fd)
                        continue;
                pos->disabled = true;
        }
 
-       evlist->enabled = false;
+       /*
+        * If we disabled only single event, we need to check
+        * the enabled state of the evlist manually.
+        */
+       if (evsel_name)
+               evlist->enabled = evlist__is_enabled(evlist);
+       else
+               evlist->enabled = false;
 }
 
-void evlist__enable(struct evlist *evlist)
+void evlist__disable(struct evlist *evlist)
+{
+       __evlist__disable(evlist, NULL);
+}
+
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
+{
+       __evlist__disable(evlist, evsel_name);
+}
+
+static void __evlist__enable(struct evlist *evlist, char *evsel_name)
 {
        struct evsel *pos;
        struct affinity affinity;
@@ -430,6 +479,8 @@ void evlist__enable(struct evlist *evlist)
                affinity__set(&affinity, cpu);
 
                evlist__for_each_entry(evlist, pos) {
+                       if (evsel__strcmp(pos, evsel_name))
+                               continue;
                        if (evsel__cpu_iter_skip(pos, cpu))
                                continue;
                        if (!evsel__is_group_leader(pos) || !pos->core.fd)
@@ -439,24 +490,40 @@ void evlist__enable(struct evlist *evlist)
        }
        affinity__cleanup(&affinity);
        evlist__for_each_entry(evlist, pos) {
+               if (evsel__strcmp(pos, evsel_name))
+                       continue;
                if (!evsel__is_group_leader(pos) || !pos->core.fd)
                        continue;
                pos->disabled = false;
        }
 
+       /*
+        * Even single event sets the 'enabled' for evlist,
+        * so the toggle can work properly and toggle to
+        * 'disabled' state.
+        */
        evlist->enabled = true;
 }
 
-void perf_evlist__toggle_enable(struct evlist *evlist)
+void evlist__enable(struct evlist *evlist)
+{
+       __evlist__enable(evlist, NULL);
+}
+
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
+{
+       __evlist__enable(evlist, evsel_name);
+}
+
+void evlist__toggle_enable(struct evlist *evlist)
 {
        (evlist->enabled ? evlist__disable : evlist__enable)(evlist);
 }
 
-static int perf_evlist__enable_event_cpu(struct evlist *evlist,
-                                        struct evsel *evsel, int cpu)
+static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu)
 {
        int thread;
-       int nr_threads = perf_evlist__nr_threads(evlist, evsel);
+       int nr_threads = evlist__nr_threads(evlist, evsel);
 
        if (!evsel->core.fd)
                return -EINVAL;
@@ -469,9 +536,7 @@ static int perf_evlist__enable_event_cpu(struct evlist *evlist,
        return 0;
 }
 
-static int perf_evlist__enable_event_thread(struct evlist *evlist,
-                                           struct evsel *evsel,
-                                           int thread)
+static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread)
 {
        int cpu;
        int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
@@ -487,15 +552,14 @@ static int perf_evlist__enable_event_thread(struct evlist *evlist,
        return 0;
 }
 
-int perf_evlist__enable_event_idx(struct evlist *evlist,
-                                 struct evsel *evsel, int idx)
+int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
 {
        bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
 
        if (per_cpu_mmaps)
-               return perf_evlist__enable_event_cpu(evlist, evsel, idx);
-       else
-               return perf_evlist__enable_event_thread(evlist, evsel, idx);
+               return evlist__enable_event_cpu(evlist, evsel, idx);
+
+       return evlist__enable_event_thread(evlist, evsel, idx);
 }
 
 int evlist__add_pollfd(struct evlist *evlist, int fd)
@@ -513,7 +577,7 @@ int evlist__poll(struct evlist *evlist, int timeout)
        return perf_evlist__poll(&evlist->core, timeout);
 }
 
-struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id)
 {
        struct hlist_head *head;
        struct perf_sample_id *sid;
@@ -529,14 +593,14 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
        return NULL;
 }
 
-struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id)
 {
        struct perf_sample_id *sid;
 
        if (evlist->core.nr_entries == 1 || !id)
                return evlist__first(evlist);
 
-       sid = perf_evlist__id2sid(evlist, id);
+       sid = evlist__id2sid(evlist, id);
        if (sid)
                return container_of(sid->evsel, struct evsel, core);
 
@@ -546,23 +610,21 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
        return NULL;
 }
 
-struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
-                                               u64 id)
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id)
 {
        struct perf_sample_id *sid;
 
        if (!id)
                return NULL;
 
-       sid = perf_evlist__id2sid(evlist, id);
+       sid = evlist__id2sid(evlist, id);
        if (sid)
                return container_of(sid->evsel, struct evsel, core);
 
        return NULL;
 }
 
-static int perf_evlist__event2id(struct evlist *evlist,
-                                union perf_event *event, u64 *id)
+static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id)
 {
        const __u64 *array = event->sample.array;
        ssize_t n;
@@ -582,8 +644,7 @@ static int perf_evlist__event2id(struct evlist *evlist,
        return 0;
 }
 
-struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
-                                           union perf_event *event)
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event)
 {
        struct evsel *first = evlist__first(evlist);
        struct hlist_head *head;
@@ -598,7 +659,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
            event->header.type != PERF_RECORD_SAMPLE)
                return first;
 
-       if (perf_evlist__event2id(evlist, event, &id))
+       if (evlist__event2id(evlist, event, &id))
                return NULL;
 
        /* Synthesized events have an id of zero */
@@ -615,7 +676,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
        return NULL;
 }
 
-static int perf_evlist__set_paused(struct evlist *evlist, bool value)
+static int evlist__set_paused(struct evlist *evlist, bool value)
 {
        int i;
 
@@ -635,14 +696,14 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value)
        return 0;
 }
 
-static int perf_evlist__pause(struct evlist *evlist)
+static int evlist__pause(struct evlist *evlist)
 {
-       return perf_evlist__set_paused(evlist, true);
+       return evlist__set_paused(evlist, true);
 }
 
-static int perf_evlist__resume(struct evlist *evlist)
+static int evlist__resume(struct evlist *evlist)
 {
-       return perf_evlist__set_paused(evlist, false);
+       return evlist__set_paused(evlist, false);
 }
 
 static void evlist__munmap_nofree(struct evlist *evlist)
@@ -727,7 +788,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
                if (overwrite) {
                        evlist->overwrite_mmap = maps;
                        if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
-                               perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+                               evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
                } else {
                        evlist->mmap = maps;
                }
@@ -827,7 +888,7 @@ static long parse_pages_arg(const char *str, unsigned long min,
        return pages;
 }
 
-int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
 {
        unsigned long max = UINT_MAX;
        long pages;
@@ -845,10 +906,9 @@ int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
        return 0;
 }
 
-int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
-                                 int unset __maybe_unused)
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused)
 {
-       return __perf_evlist__parse_mmap_pages(opt->value, str);
+       return __evlist__parse_mmap_pages(opt->value, str);
 }
 
 /**
@@ -904,7 +964,7 @@ int evlist__mmap(struct evlist *evlist, unsigned int pages)
        return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
 }
 
-int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
+int evlist__create_maps(struct evlist *evlist, struct target *target)
 {
        bool all_threads = (target->per_thread && target->system_wide);
        struct perf_cpu_map *cpus;
@@ -957,25 +1017,7 @@ out_delete_threads:
        return -1;
 }
 
-void __perf_evlist__set_sample_bit(struct evlist *evlist,
-                                  enum perf_event_sample_format bit)
-{
-       struct evsel *evsel;
-
-       evlist__for_each_entry(evlist, evsel)
-               __evsel__set_sample_bit(evsel, bit);
-}
-
-void __perf_evlist__reset_sample_bit(struct evlist *evlist,
-                                    enum perf_event_sample_format bit)
-{
-       struct evsel *evsel;
-
-       evlist__for_each_entry(evlist, evsel)
-               __evsel__reset_sample_bit(evsel, bit);
-}
-
-int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
 {
        struct evsel *evsel;
        int err = 0;
@@ -998,7 +1040,7 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
        return err;
 }
 
-int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter)
 {
        struct evsel *evsel;
        int err = 0;
@@ -1018,7 +1060,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
        return err;
 }
 
-int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter)
 {
        struct evsel *evsel;
        int err = 0;
@@ -1064,32 +1106,32 @@ out_free:
        return NULL;
 }
 
-int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
 {
        char *filter = asprintf__tp_filter_pids(npids, pids);
-       int ret = perf_evlist__set_tp_filter(evlist, filter);
+       int ret = evlist__set_tp_filter(evlist, filter);
 
        free(filter);
        return ret;
 }
 
-int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
 {
-       return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
+       return evlist__set_tp_filter_pids(evlist, 1, &pid);
 }
 
-int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
 {
        char *filter = asprintf__tp_filter_pids(npids, pids);
-       int ret = perf_evlist__append_tp_filter(evlist, filter);
+       int ret = evlist__append_tp_filter(evlist, filter);
 
        free(filter);
        return ret;
 }
 
-int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
 {
-       return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
+       return evlist__append_tp_filter_pids(evlist, 1, &pid);
 }
 
 bool evlist__valid_sample_type(struct evlist *evlist)
@@ -1140,7 +1182,7 @@ u64 evlist__combined_branch_type(struct evlist *evlist)
        return branch_type;
 }
 
-bool perf_evlist__valid_read_format(struct evlist *evlist)
+bool evlist__valid_read_format(struct evlist *evlist)
 {
        struct evsel *first = evlist__first(evlist), *pos = first;
        u64 read_format = first->core.attr.read_format;
@@ -1162,7 +1204,7 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
        return true;
 }
 
-u16 perf_evlist__id_hdr_size(struct evlist *evlist)
+u16 evlist__id_hdr_size(struct evlist *evlist)
 {
        struct evsel *first = evlist__first(evlist);
        struct perf_sample *data;
@@ -1213,8 +1255,7 @@ bool evlist__sample_id_all(struct evlist *evlist)
        return first->core.attr.sample_id_all;
 }
 
-void perf_evlist__set_selected(struct evlist *evlist,
-                              struct evsel *evsel)
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
 {
        evlist->selected = evsel;
 }
@@ -1253,7 +1294,7 @@ void evlist__close(struct evlist *evlist)
        }
 }
 
-static int perf_evlist__create_syswide_maps(struct evlist *evlist)
+static int evlist__create_syswide_maps(struct evlist *evlist)
 {
        struct perf_cpu_map *cpus;
        struct perf_thread_map *threads;
@@ -1295,12 +1336,12 @@ int evlist__open(struct evlist *evlist)
         * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
         */
        if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
-               err = perf_evlist__create_syswide_maps(evlist);
+               err = evlist__create_syswide_maps(evlist);
                if (err < 0)
                        goto out_err;
        }
 
-       perf_evlist__update_id_pos(evlist);
+       evlist__update_id_pos(evlist);
 
        evlist__for_each_entry(evlist, evsel) {
                err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
@@ -1315,9 +1356,8 @@ out_err:
        return err;
 }
 
-int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
-                                 const char *argv[], bool pipe_output,
-                                 void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
+int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[],
+                            bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
 {
        int child_ready_pipe[2], go_pipe[2];
        char bf;
@@ -1362,7 +1402,7 @@ int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
                /*
                 * The parent will ask for the execvp() to be performed by
                 * writing exactly one byte, in workload.cork_fd, usually via
-                * perf_evlist__start_workload().
+                * evlist__start_workload().
                 *
                 * For cancelling the workload without actually running it,
                 * the parent will just close workload.cork_fd, without writing
@@ -1429,7 +1469,7 @@ out_close_ready_pipe:
        return -1;
 }
 
-int perf_evlist__start_workload(struct evlist *evlist)
+int evlist__start_workload(struct evlist *evlist)
 {
        if (evlist->workload.cork_fd > 0) {
                char bf = 0;
@@ -1448,21 +1488,18 @@ int perf_evlist__start_workload(struct evlist *evlist)
        return 0;
 }
 
-int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
-                             struct perf_sample *sample)
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
-       struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+       struct evsel *evsel = evlist__event2evsel(evlist, event);
 
        if (!evsel)
                return -EFAULT;
        return evsel__parse_sample(evsel, event, sample);
 }
 
-int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
-                                       union perf_event *event,
-                                       u64 *timestamp)
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp)
 {
-       struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+       struct evsel *evsel = evlist__event2evsel(evlist, event);
 
        if (!evsel)
                return -EFAULT;
@@ -1553,8 +1590,7 @@ int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size
        return 0;
 }
 
-void perf_evlist__to_front(struct evlist *evlist,
-                          struct evsel *move_evsel)
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel)
 {
        struct evsel *evsel, *n;
        LIST_HEAD(move);
@@ -1570,7 +1606,7 @@ void perf_evlist__to_front(struct evlist *evlist,
        list_splice(&move, &evlist->core.entries);
 }
 
-struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist)
+struct evsel *evlist__get_tracking_event(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -1582,8 +1618,7 @@ struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist)
        return evlist__first(evlist);
 }
 
-void perf_evlist__set_tracking_event(struct evlist *evlist,
-                                    struct evsel *tracking_evsel)
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel)
 {
        struct evsel *evsel;
 
@@ -1598,9 +1633,7 @@ void perf_evlist__set_tracking_event(struct evlist *evlist,
        tracking_evsel->tracking = true;
 }
 
-struct evsel *
-perf_evlist__find_evsel_by_str(struct evlist *evlist,
-                              const char *str)
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str)
 {
        struct evsel *evsel;
 
@@ -1614,8 +1647,7 @@ perf_evlist__find_evsel_by_str(struct evlist *evlist,
        return NULL;
 }
 
-void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
-                                 enum bkw_mmap_state state)
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state)
 {
        enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
        enum action {
@@ -1658,10 +1690,10 @@ void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
 
        switch (action) {
        case PAUSE:
-               perf_evlist__pause(evlist);
+               evlist__pause(evlist);
                break;
        case RESUME:
-               perf_evlist__resume(evlist);
+               evlist__resume(evlist);
                break;
        case NONE:
        default:
@@ -1672,7 +1704,7 @@ state_err:
        return;
 }
 
-bool perf_evlist__exclude_kernel(struct evlist *evlist)
+bool evlist__exclude_kernel(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -1689,19 +1721,17 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist)
  * the group display. Set the artificial group and set the leader's
  * forced_leader flag to notify the display code.
  */
-void perf_evlist__force_leader(struct evlist *evlist)
+void evlist__force_leader(struct evlist *evlist)
 {
        if (!evlist->nr_groups) {
                struct evsel *leader = evlist__first(evlist);
 
-               perf_evlist__set_leader(evlist);
+               evlist__set_leader(evlist);
                leader->forced_leader = true;
        }
 }
 
-struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
-                                                struct evsel *evsel,
-                                               bool close)
+struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close)
 {
        struct evsel *c2, *leader;
        bool is_open = true;
index e1a4503..1aae758 100644 (file)
@@ -87,8 +87,8 @@ struct evsel_str_handler {
 };
 
 struct evlist *evlist__new(void);
-struct evlist *perf_evlist__new_default(void);
-struct evlist *perf_evlist__new_dummy(void);
+struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
                  struct perf_thread_map *threads);
 void evlist__exit(struct evlist *evlist);
@@ -112,14 +112,11 @@ int __evlist__add_default_attrs(struct evlist *evlist,
 
 int evlist__add_dummy(struct evlist *evlist);
 
-int perf_evlist__add_sb_event(struct evlist *evlist,
-                             struct perf_event_attr *attr,
-                             evsel__sb_cb_t cb,
-                             void *data);
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+                        evsel__sb_cb_t cb, void *data);
 void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
-int perf_evlist__start_sb_thread(struct evlist *evlist,
-                                struct target *target);
-void perf_evlist__stop_sb_thread(struct evlist *evlist);
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target);
+void evlist__stop_sb_thread(struct evlist *evlist);
 
 int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
 
@@ -130,45 +127,29 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist,
 #define evlist__set_tracepoints_handlers(evlist, array) \
        __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
 
-void __perf_evlist__set_sample_bit(struct evlist *evlist,
-                                  enum perf_event_sample_format bit);
-void __perf_evlist__reset_sample_bit(struct evlist *evlist,
-                                    enum perf_event_sample_format bit);
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter);
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
 
-#define perf_evlist__set_sample_bit(evlist, bit) \
-       __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
 
-#define perf_evlist__reset_sample_bit(evlist, bit) \
-       __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
 
-int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter);
-int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
-int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
-
-int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter);
-
-int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
-int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
-
-struct evsel *
-perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
-
-struct evsel *
-perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
-                                    const char *name);
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name);
 
 int evlist__add_pollfd(struct evlist *evlist, int fd);
 int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
 
 int evlist__poll(struct evlist *evlist, int timeout);
 
-struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id);
-struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
-                                               u64 id);
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id);
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id);
 
-struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id);
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id);
 
-void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
 
 void evlist__mmap_consume(struct evlist *evlist, int idx);
 
@@ -177,24 +158,19 @@ void evlist__close(struct evlist *evlist);
 
 struct callchain_param;
 
-void perf_evlist__set_id_pos(struct evlist *evlist);
-void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
-                        struct callchain_param *callchain);
+void evlist__set_id_pos(struct evlist *evlist);
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
 
-int perf_evlist__prepare_workload(struct evlist *evlist,
-                                 struct target *target,
-                                 const char *argv[], bool pipe_output,
-                                 void (*exec_error)(int signo, siginfo_t *info,
-                                                    void *ucontext));
-int perf_evlist__start_workload(struct evlist *evlist);
+int evlist__prepare_workload(struct evlist *evlist, struct target *target,
+                            const char *argv[], bool pipe_output,
+                            void (*exec_error)(int signo, siginfo_t *info, void *ucontext));
+int evlist__start_workload(struct evlist *evlist);
 
 struct option;
 
-int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
-int perf_evlist__parse_mmap_pages(const struct option *opt,
-                                 const char *str,
-                                 int unset);
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset);
 
 unsigned long perf_event_mlock_kb_in_pages(void);
 
@@ -209,41 +185,36 @@ size_t evlist__mmap_size(unsigned long pages);
 
 void evlist__disable(struct evlist *evlist);
 void evlist__enable(struct evlist *evlist);
-void perf_evlist__toggle_enable(struct evlist *evlist);
+void evlist__toggle_enable(struct evlist *evlist);
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
 
-int perf_evlist__enable_event_idx(struct evlist *evlist,
-                                 struct evsel *evsel, int idx);
+int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx);
 
-void perf_evlist__set_selected(struct evlist *evlist,
-                              struct evsel *evsel);
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
 
-int perf_evlist__create_maps(struct evlist *evlist, struct target *target);
-int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
+int evlist__create_maps(struct evlist *evlist, struct target *target);
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 
-void __perf_evlist__set_leader(struct list_head *list);
-void perf_evlist__set_leader(struct evlist *evlist);
+void __evlist__set_leader(struct list_head *list);
+void evlist__set_leader(struct evlist *evlist);
 
 u64 __evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_branch_type(struct evlist *evlist);
 bool evlist__sample_id_all(struct evlist *evlist);
-u16 perf_evlist__id_hdr_size(struct evlist *evlist);
-
-int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
-                             struct perf_sample *sample);
+u16 evlist__id_hdr_size(struct evlist *evlist);
 
-int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
-                                       union perf_event *event,
-                                       u64 *timestamp);
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp);
 
 bool evlist__valid_sample_type(struct evlist *evlist);
 bool evlist__valid_sample_id_all(struct evlist *evlist);
-bool perf_evlist__valid_read_format(struct evlist *evlist);
+bool evlist__valid_read_format(struct evlist *evlist);
 
-void perf_evlist__splice_list_tail(struct evlist *evlist,
-                                  struct list_head *list);
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list);
 
-static inline bool perf_evlist__empty(struct evlist *evlist)
+static inline bool evlist__empty(struct evlist *evlist)
 {
        return list_empty(&evlist->core.entries);
 }
@@ -265,9 +236,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist)
 int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
 int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
 
-bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
-void perf_evlist__to_front(struct evlist *evlist,
-                          struct evsel *move_evsel);
+bool evlist__can_select_event(struct evlist *evlist, const char *str);
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
 
 /**
  * __evlist__for_each_entry - iterate thru all the evsels
@@ -339,27 +309,23 @@ void perf_evlist__to_front(struct evlist *evlist,
        evlist__cpu_iter_start(evlist);                 \
        perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
 
-struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist);
-void perf_evlist__set_tracking_event(struct evlist *evlist,
-                                    struct evsel *tracking_evsel);
+struct evsel *evlist__get_tracking_event(struct evlist *evlist);
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
 
 void evlist__cpu_iter_start(struct evlist *evlist);
 bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
 bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
 
-struct evsel *
-perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
+
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
 
-struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
-                                           union perf_event *event);
+bool evlist__exclude_kernel(struct evlist *evlist);
 
-bool perf_evlist__exclude_kernel(struct evlist *evlist);
+void evlist__force_leader(struct evlist *evlist);
 
-void perf_evlist__force_leader(struct evlist *evlist);
+struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close);
 
-struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
-                                                struct evsel *evsel,
-                                               bool close);
 #define EVLIST_CTL_CMD_ENABLE_TAG  "enable"
 #define EVLIST_CTL_CMD_DISABLE_TAG "disable"
 #define EVLIST_CTL_CMD_ACK_TAG     "ack\n"
index 1cad605..c26ea82 100644 (file)
@@ -497,7 +497,7 @@ static const char *__evsel__hw_name(u64 config)
        return "unknown-hardware";
 }
 
-static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
+static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
 {
        int colon = 0, r = 0;
        struct perf_event_attr *attr = &evsel->core.attr;
@@ -536,7 +536,7 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
 static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
 {
        int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
-       return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+       return r + evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
 const char *evsel__sw_names[PERF_COUNT_SW_MAX] = {
@@ -562,7 +562,7 @@ static const char *__evsel__sw_name(u64 config)
 static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
 {
        int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
-       return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+       return r + evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
 static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
@@ -587,7 +587,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
 {
        struct perf_event_attr *attr = &evsel->core.attr;
        int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
-       return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+       return r + evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
 const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
@@ -682,13 +682,13 @@ out_err:
 static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
 {
        int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
-       return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+       return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
 static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
 {
        int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
-       return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+       return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
 static int evsel__tool_name(char *bf, size_t size)
@@ -850,9 +850,7 @@ void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
                return __evsel__config_callchain(evsel, opts, param);
 }
 
-static void
-perf_evsel__reset_callgraph(struct evsel *evsel,
-                           struct callchain_param *param)
+static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
 {
        struct perf_event_attr *attr = &evsel->core.attr;
 
@@ -988,7 +986,7 @@ static void evsel__apply_config_terms(struct evsel *evsel,
 
                /* If global callgraph set, clear it */
                if (callchain_param.enabled)
-                       perf_evsel__reset_callgraph(evsel, &callchain_param);
+                       evsel__reset_callgraph(evsel, &callchain_param);
 
                /* set perf-event callgraph */
                if (param.enabled) {
@@ -1190,6 +1188,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
                evsel__set_sample_bit(evsel, CGROUP);
        }
 
+       if (opts->sample_data_page_size)
+               evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
+
        if (opts->record_switch_events)
                attr->context_switch = track;
 
@@ -1434,9 +1435,7 @@ static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
        return perf_evsel__read(&evsel->core, cpu, thread, count);
 }
 
-static void
-perf_evsel__set_count(struct evsel *counter, int cpu, int thread,
-                     u64 val, u64 ena, u64 run)
+static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run)
 {
        struct perf_counts_values *count;
 
@@ -1449,9 +1448,7 @@ perf_evsel__set_count(struct evsel *counter, int cpu, int thread,
        perf_counts__set_loaded(counter->counts, cpu, thread, true);
 }
 
-static int
-perf_evsel__process_group_data(struct evsel *leader,
-                              int cpu, int thread, u64 *data)
+static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data)
 {
        u64 read_format = leader->core.attr.read_format;
        struct sample_read_value *v;
@@ -1470,18 +1467,16 @@ perf_evsel__process_group_data(struct evsel *leader,
 
        v = (struct sample_read_value *) data;
 
-       perf_evsel__set_count(leader, cpu, thread,
-                             v[0].value, ena, run);
+       evsel__set_count(leader, cpu, thread, v[0].value, ena, run);
 
        for (i = 1; i < nr; i++) {
                struct evsel *counter;
 
-               counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
+               counter = evlist__id2evsel(leader->evlist, v[i].id);
                if (!counter)
                        return -EINVAL;
 
-               perf_evsel__set_count(counter, cpu, thread,
-                                     v[i].value, ena, run);
+               evsel__set_count(counter, cpu, thread, v[i].value, ena, run);
        }
 
        return 0;
@@ -1514,7 +1509,7 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread)
        if (readn(FD(leader, cpu, thread), data, size) <= 0)
                return -errno;
 
-       return perf_evsel__process_group_data(leader, cpu, thread, data);
+       return evsel__process_group_data(leader, cpu, thread, data);
 }
 
 int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
@@ -1567,9 +1562,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
        return fd;
 }
 
-static void perf_evsel__remove_fd(struct evsel *pos,
-                                 int nr_cpus, int nr_threads,
-                                 int thread_idx)
+static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
 {
        for (int cpu = 0; cpu < nr_cpus; cpu++)
                for (int thread = thread_idx; thread < nr_threads - 1; thread++)
@@ -1588,7 +1581,7 @@ static int update_fds(struct evsel *evsel,
        evlist__for_each_entry(evsel->evlist, pos) {
                nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
 
-               perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
+               evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
 
                /*
                 * Since fds for next evsel has not been created,
@@ -1880,7 +1873,12 @@ try_fallback:
         * Must probe features in the order they were added to the
         * perf_event_attr interface.
         */
-        if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+        if (!perf_missing_features.data_page_size &&
+           (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
+               perf_missing_features.data_page_size = true;
+               pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
+               goto out_close;
+       } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
                perf_missing_features.cgroup = true;
                pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
                goto out_close;
@@ -2365,6 +2363,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
                array++;
        }
 
+       data->data_page_size = 0;
+       if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+               data->data_page_size = *array;
+               array++;
+       }
+
        if (type & PERF_SAMPLE_AUX) {
                OVERFLOW_CHECK_u64(array);
                sz = *array++;
@@ -2674,6 +2678,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
        "We found oprofile daemon running, please stop it and try again.");
                break;
        case EINVAL:
+               if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
+                       return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
                if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
                        return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
                if (perf_missing_features.clockid)
index 79a860d..cd1d8dd 100644 (file)
@@ -144,6 +144,7 @@ struct perf_missing_features {
        bool aux_output;
        bool branch_hw_idx;
        bool cgroup;
+       bool data_page_size;
 };
 
 extern struct perf_missing_features perf_missing_features;
index 3ba72f7..40cb56a 100644 (file)
@@ -41,7 +41,7 @@ static int evswitch__fprintf_enoent(FILE *fp, const char *evtype, const char *ev
 int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp)
 {
        if (evswitch->on_name) {
-               evswitch->on = perf_evlist__find_evsel_by_str(evlist, evswitch->on_name);
+               evswitch->on = evlist__find_evsel_by_str(evlist, evswitch->on_name);
                if (evswitch->on == NULL) {
                        evswitch__fprintf_enoent(fp, "on", evswitch->on_name);
                        return -ENOENT;
@@ -50,7 +50,7 @@ int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp)
        }
 
        if (evswitch->off_name) {
-               evswitch->off = perf_evlist__find_evsel_by_str(evlist, evswitch->off_name);
+               evswitch->off = evlist__find_evsel_by_str(evlist, evswitch->off_name);
                if (evswitch->off == NULL) {
                        evswitch__fprintf_enoent(fp, "off", evswitch->off_name);
                        return -ENOENT;
index 53482ef..a850fd0 100644 (file)
 extern int expr_debug;
 #endif
 
+struct expr_id_data {
+       union {
+               double val;
+               struct {
+                       double val;
+                       const char *metric_name;
+                       const char *metric_expr;
+               } ref;
+               struct expr_id  *parent;
+       };
+
+       enum {
+               /* Holding a double value. */
+               EXPR_ID_DATA__VALUE,
+               /* Reference to another metric. */
+               EXPR_ID_DATA__REF,
+               /* A reference but the value has been computed. */
+               EXPR_ID_DATA__REF_VALUE,
+               /* A parent is remembered for the recursion check. */
+               EXPR_ID_DATA__PARENT,
+       } kind;
+};
+
 static size_t key_hash(const void *key, void *ctx __maybe_unused)
 {
        const char *str = (const char *)key;
@@ -48,6 +71,7 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
                return -ENOMEM;
 
        data_ptr->parent = ctx->parent;
+       data_ptr->kind = EXPR_ID_DATA__PARENT;
 
        ret = hashmap__set(&ctx->ids, id, data_ptr,
                           (const void **)&old_key, (void **)&old_data);
@@ -69,7 +93,7 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
        if (!data_ptr)
                return -ENOMEM;
        data_ptr->val = val;
-       data_ptr->is_ref = false;
+       data_ptr->kind = EXPR_ID_DATA__VALUE;
 
        ret = hashmap__set(&ctx->ids, id, data_ptr,
                           (const void **)&old_key, (void **)&old_data);
@@ -114,8 +138,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
         */
        data_ptr->ref.metric_name = ref->metric_name;
        data_ptr->ref.metric_expr = ref->metric_expr;
-       data_ptr->ref.counted = false;
-       data_ptr->is_ref = true;
+       data_ptr->kind = EXPR_ID_DATA__REF;
 
        ret = hashmap__set(&ctx->ids, name, data_ptr,
                           (const void **)&old_key, (void **)&old_data);
@@ -148,17 +171,30 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
 
        data = *datap;
 
-       pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n",
-                 data->is_ref, data->ref.counted, data->val, id);
-
-       if (data->is_ref && !data->ref.counted) {
-               data->ref.counted = true;
+       switch (data->kind) {
+       case EXPR_ID_DATA__VALUE:
+               pr_debug2("lookup(%s): val %f\n", id, data->val);
+               break;
+       case EXPR_ID_DATA__PARENT:
+               pr_debug2("lookup(%s): parent %s\n", id, data->parent->id);
+               break;
+       case EXPR_ID_DATA__REF:
+               pr_debug2("lookup(%s): ref metric name %s\n", id,
+                       data->ref.metric_name);
                pr_debug("processing metric: %s ENTRY\n", id);
-               if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) {
+               data->kind = EXPR_ID_DATA__REF_VALUE;
+               if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) {
                        pr_debug("%s failed to count\n", id);
                        return -1;
                }
                pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+               break;
+       case EXPR_ID_DATA__REF_VALUE:
+               pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
+                       data->ref.val, data->ref.metric_name);
+               break;
+       default:
+               assert(0);  /* Unreachable. */
        }
 
        return 0;
@@ -241,3 +277,17 @@ int expr__find_other(const char *expr, const char *one,
 
        return ret;
 }
+
+double expr_id_data__value(const struct expr_id_data *data)
+{
+       if (data->kind == EXPR_ID_DATA__VALUE)
+               return data->val;
+       assert(data->kind == EXPR_ID_DATA__REF_VALUE);
+       return data->ref.val;
+}
+
+struct expr_id *expr_id_data__parent(struct expr_id_data *data)
+{
+       assert(data->kind == EXPR_ID_DATA__PARENT);
+       return data->parent;
+}
index fc2b5e8..dcf8d19 100644 (file)
@@ -23,19 +23,7 @@ struct expr_parse_ctx {
        struct expr_id  *parent;
 };
 
-struct expr_id_data {
-       union {
-               double val;
-               struct {
-                       const char *metric_name;
-                       const char *metric_expr;
-                       bool counted;
-               } ref;
-               struct expr_id  *parent;
-       };
-
-       bool is_ref;
-};
+struct expr_id_data;
 
 struct expr_scanner_ctx {
        int start_token;
@@ -57,4 +45,7 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
 int expr__find_other(const char *expr, const char *one,
                struct expr_parse_ctx *ids, int runtime);
 
+double expr_id_data__value(const struct expr_id_data *data);
+struct expr_id *expr_id_data__parent(struct expr_id_data *data);
+
 #endif
index d34b370..b2ada8f 100644 (file)
@@ -93,7 +93,7 @@ expr:   NUMBER
                                                YYABORT;
                                        }
 
-                                       $$ = data->val;
+                                       $$ = expr_id_data__value(data);
                                        free($1);
                                }
        | expr '|' expr         { $$ = (long)$1 | (long)$3; }
index be850e9..062383e 100644 (file)
@@ -19,7 +19,9 @@
 #include <sys/utsname.h>
 #include <linux/time64.h>
 #include <dirent.h>
+#ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/libbpf.h>
+#endif
 #include <perf/cpumap.h>
 
 #include "dso.h"
@@ -987,13 +989,6 @@ out:
        up_read(&env->bpf_progs.lock);
        return ret;
 }
-#else // HAVE_LIBBPF_SUPPORT
-static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused,
-                              struct evlist *evlist __maybe_unused)
-{
-       return 0;
-}
-#endif // HAVE_LIBBPF_SUPPORT
 
 static int write_bpf_btf(struct feat_fd *ff,
                         struct evlist *evlist __maybe_unused)
@@ -1027,6 +1022,7 @@ out:
        up_read(&env->bpf_progs.lock);
        return ret;
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
@@ -1638,6 +1634,7 @@ static void print_dir_format(struct feat_fd *ff, FILE *fp)
        fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version);
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
 {
        struct perf_env *env = &ff->ph->env;
@@ -1683,6 +1680,7 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp)
 
        up_read(&env->bpf_progs.lock);
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static void free_event_desc(struct evsel *events)
 {
@@ -2265,8 +2263,7 @@ static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
        return 0;
 }
 
-static struct evsel *
-perf_evlist__find_by_index(struct evlist *evlist, int idx)
+static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx)
 {
        struct evsel *evsel;
 
@@ -2278,16 +2275,14 @@ perf_evlist__find_by_index(struct evlist *evlist, int idx)
        return NULL;
 }
 
-static void
-perf_evlist__set_event_name(struct evlist *evlist,
-                           struct evsel *event)
+static void evlist__set_event_name(struct evlist *evlist, struct evsel *event)
 {
        struct evsel *evsel;
 
        if (!event->name)
                return;
 
-       evsel = perf_evlist__find_by_index(evlist, event->idx);
+       evsel = evlist__find_by_index(evlist, event->idx);
        if (!evsel)
                return;
 
@@ -2315,7 +2310,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
        }
 
        for (evsel = events; evsel->core.attr.size; evsel++)
-               perf_evlist__set_event_name(session->evlist, evsel);
+               evlist__set_event_name(session->evlist, evsel);
 
        if (!session->data->is_pipe)
                free_event_desc(events);
@@ -2938,12 +2933,6 @@ out:
        up_write(&env->bpf_progs.lock);
        return err;
 }
-#else // HAVE_LIBBPF_SUPPORT
-static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused)
-{
-       return 0;
-}
-#endif // HAVE_LIBBPF_SUPPORT
 
 static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
 {
@@ -2990,6 +2979,7 @@ out:
        free(node);
        return err;
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static int process_compressed(struct feat_fd *ff,
                              void *data __maybe_unused)
@@ -3120,8 +3110,10 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
        FEAT_OPR(MEM_TOPOLOGY,  mem_topology,   true),
        FEAT_OPR(CLOCKID,       clockid,        false),
        FEAT_OPN(DIR_FORMAT,    dir_format,     false),
+#ifdef HAVE_LIBBPF_SUPPORT
        FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false),
        FEAT_OPR(BPF_BTF,       bpf_btf,        false),
+#endif
        FEAT_OPR(COMPRESSED,    compressed,     false),
        FEAT_OPR(CPU_PMU_CAPS,  cpu_pmu_caps,   false),
        FEAT_OPR(CLOCK_DATA,    clock_data,     false),
@@ -3652,7 +3644,8 @@ static int perf_file_section__process(struct perf_file_section *section,
 }
 
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
-                                      struct perf_header *ph, int fd,
+                                      struct perf_header *ph,
+                                      struct perf_data* data,
                                       bool repipe)
 {
        struct feat_fd ff = {
@@ -3661,7 +3654,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
        };
        ssize_t ret;
 
-       ret = readn(fd, header, sizeof(*header));
+       ret = perf_data__read(data, header, sizeof(*header));
        if (ret <= 0)
                return -1;
 
@@ -3684,8 +3677,7 @@ static int perf_header__read_pipe(struct perf_session *session)
        struct perf_header *header = &session->header;
        struct perf_pipe_file_header f_header;
 
-       if (perf_file_header__read_pipe(&f_header, header,
-                                       perf_data__fd(session->data),
+       if (perf_file_header__read_pipe(&f_header, header, session->data,
                                        session->repipe) < 0) {
                pr_debug("incompatible file format\n");
                return -EINVAL;
@@ -3740,8 +3732,7 @@ static int read_attr(int fd, struct perf_header *ph,
        return ret <= 0 ? -1 : 0;
 }
 
-static int perf_evsel__prepare_tracepoint_event(struct evsel *evsel,
-                                               struct tep_handle *pevent)
+static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent)
 {
        struct tep_event *event;
        char bf[128];
@@ -3772,14 +3763,13 @@ static int perf_evsel__prepare_tracepoint_event(struct evsel *evsel,
        return 0;
 }
 
-static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist,
-                                                 struct tep_handle *pevent)
+static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_handle *pevent)
 {
        struct evsel *pos;
 
        evlist__for_each_entry(evlist, pos) {
                if (pos->core.attr.type == PERF_TYPE_TRACEPOINT &&
-                   perf_evsel__prepare_tracepoint_event(pos, pevent))
+                   evsel__prepare_tracepoint_event(pos, pevent))
                        return -1;
        }
 
@@ -3888,8 +3878,7 @@ int perf_session__read_header(struct perf_session *session)
        perf_header__process_sections(header, fd, &session->tevent,
                                      perf_file_section__process);
 
-       if (perf_evlist__prepare_tracepoint_events(session->evlist,
-                                                  session->tevent.pevent))
+       if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent))
                goto out_delete_evlist;
 
        return 0;
@@ -4037,7 +4026,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
 
        evlist = *pevlist;
 
-       evsel = perf_evlist__id2evsel(evlist, ev->id);
+       evsel = evlist__id2evsel(evlist, ev->id);
        if (evsel == NULL)
                return -EINVAL;
 
@@ -4110,8 +4099,7 @@ int perf_event__process_tracing_data(struct perf_session *session,
                return -1;
        }
 
-       perf_evlist__prepare_tracepoint_events(session->evlist,
-                                              session->tevent.pevent);
+       evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent);
 
        return size_read + padding;
 }
index 8a793e4..a08fb9e 100644 (file)
@@ -188,6 +188,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
                hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
                                   unresolved_col_width + 4 + 2);
 
+               hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE,
+                                  unresolved_col_width + 4 + 2);
+
        } else {
                symlen = unresolved_col_width + 4 + 2;
                hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
@@ -2654,7 +2657,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
        }
 }
 
-size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
 {
        struct evsel *pos;
        size_t ret = 0;
index 96b1c13..14f6633 100644 (file)
@@ -56,6 +56,7 @@ enum hist_column {
        HISTC_MEM_DADDR_SYMBOL,
        HISTC_MEM_DADDR_DSO,
        HISTC_MEM_PHYS_DADDR,
+       HISTC_MEM_DATA_PAGE_SIZE,
        HISTC_MEM_LOCKED,
        HISTC_MEM_TLB,
        HISTC_MEM_LVL,
@@ -196,7 +197,7 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered);
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
                      int max_cols, float min_pcnt, FILE *fp,
                      bool ignore_callchains);
-size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp);
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp);
 
 void hists__filter_by_dso(struct hists *hists);
 void hists__filter_by_thread(struct hists *hists);
@@ -464,12 +465,9 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
                             struct hist_browser_timer *hbt,
                             struct annotation_options *annotation_opts);
 
-int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
-                                 struct hist_browser_timer *hbt,
-                                 float min_pcnt,
-                                 struct perf_env *env,
-                                 bool warn_lost_event,
-                                 struct annotation_options *annotation_options);
+int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
+                            float min_pcnt, struct perf_env *env, bool warn_lost_event,
+                            struct annotation_options *annotation_options);
 
 int script_browse(const char *script_opt, struct evsel *evsel);
 
@@ -483,13 +481,13 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
                           struct annotation_options *annotation_opts);
 #else
 static inline
-int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
-                                 const char *help __maybe_unused,
-                                 struct hist_browser_timer *hbt __maybe_unused,
-                                 float min_pcnt __maybe_unused,
-                                 struct perf_env *env __maybe_unused,
-                                 bool warn_lost_event __maybe_unused,
-                                 struct annotation_options *annotation_options __maybe_unused)
+int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
+                            const char *help __maybe_unused,
+                            struct hist_browser_timer *hbt __maybe_unused,
+                            float min_pcnt __maybe_unused,
+                            struct perf_env *env __maybe_unused,
+                            bool warn_lost_event __maybe_unused,
+                            struct annotation_options *annotation_options __maybe_unused)
 {
        return 0;
 }
index 3a0348c..60214de 100644 (file)
@@ -2520,11 +2520,10 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
 static int intel_pt_process_switch(struct intel_pt *pt,
                                   struct perf_sample *sample)
 {
-       struct evsel *evsel;
        pid_t tid;
        int cpu, ret;
+       struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id);
 
-       evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
        if (evsel != pt->switch_evsel)
                return 0;
 
index 15385ea..f841f35 100644 (file)
@@ -1581,32 +1581,25 @@ static bool machine__uses_kcore(struct machine *machine)
 }
 
 static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
-                                            union perf_event *event)
+                                            struct extra_kernel_map *xm)
 {
        return machine__is(machine, "x86_64") &&
-              is_entry_trampoline(event->mmap.filename);
+              is_entry_trampoline(xm->name);
 }
 
 static int machine__process_extra_kernel_map(struct machine *machine,
-                                            union perf_event *event)
+                                            struct extra_kernel_map *xm)
 {
        struct dso *kernel = machine__kernel_dso(machine);
-       struct extra_kernel_map xm = {
-               .start = event->mmap.start,
-               .end   = event->mmap.start + event->mmap.len,
-               .pgoff = event->mmap.pgoff,
-       };
 
        if (kernel == NULL)
                return -1;
 
-       strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
-
-       return machine__create_extra_kernel_map(machine, kernel, &xm);
+       return machine__create_extra_kernel_map(machine, kernel, xm);
 }
 
 static int machine__process_kernel_mmap_event(struct machine *machine,
-                                             union perf_event *event)
+                                             struct extra_kernel_map *xm)
 {
        struct map *map;
        enum dso_space_type dso_space;
@@ -1621,20 +1614,18 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
        else
                dso_space = DSO_SPACE__KERNEL_GUEST;
 
-       is_kernel_mmap = memcmp(event->mmap.filename,
-                               machine->mmap_name,
+       is_kernel_mmap = memcmp(xm->name, machine->mmap_name,
                                strlen(machine->mmap_name) - 1) == 0;
-       if (event->mmap.filename[0] == '/' ||
-           (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-               map = machine__addnew_module_map(machine, event->mmap.start,
-                                                event->mmap.filename);
+       if (xm->name[0] == '/' ||
+           (!is_kernel_mmap && xm->name[0] == '[')) {
+               map = machine__addnew_module_map(machine, xm->start,
+                                                xm->name);
                if (map == NULL)
                        goto out_problem;
 
-               map->end = map->start + event->mmap.len;
+               map->end = map->start + xm->end - xm->start;
        } else if (is_kernel_mmap) {
-               const char *symbol_name = (event->mmap.filename +
-                               strlen(machine->mmap_name));
+               const char *symbol_name = (xm->name + strlen(machine->mmap_name));
                /*
                 * Should be there already, from the build-id table in
                 * the header.
@@ -1688,18 +1679,17 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
                if (strstr(kernel->long_name, "vmlinux"))
                        dso__set_short_name(kernel, "[kernel.vmlinux]", false);
 
-               machine__update_kernel_mmap(machine, event->mmap.start,
-                                        event->mmap.start + event->mmap.len);
+               machine__update_kernel_mmap(machine, xm->start, xm->end);
 
                /*
                 * Avoid using a zero address (kptr_restrict) for the ref reloc
                 * symbol. Effectively having zero here means that at record
                 * time /proc/sys/kernel/kptr_restrict was non zero.
                 */
-               if (event->mmap.pgoff != 0) {
+               if (xm->pgoff != 0) {
                        map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
                                                        symbol_name,
-                                                       event->mmap.pgoff);
+                                                       xm->pgoff);
                }
 
                if (machine__is_default_guest(machine)) {
@@ -1708,8 +1698,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
                         */
                        dso__load(kernel, machine__kernel_map(machine));
                }
-       } else if (perf_event__is_extra_kernel_mmap(machine, event)) {
-               return machine__process_extra_kernel_map(machine, event);
+       } else if (perf_event__is_extra_kernel_mmap(machine, xm)) {
+               return machine__process_extra_kernel_map(machine, xm);
        }
        return 0;
 out_problem:
@@ -1735,7 +1725,14 @@ int machine__process_mmap2_event(struct machine *machine,
 
        if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
            sample->cpumode == PERF_RECORD_MISC_KERNEL) {
-               ret = machine__process_kernel_mmap_event(machine, event);
+               struct extra_kernel_map xm = {
+                       .start = event->mmap2.start,
+                       .end   = event->mmap2.start + event->mmap2.len,
+                       .pgoff = event->mmap2.pgoff,
+               };
+
+               strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN);
+               ret = machine__process_kernel_mmap_event(machine, &xm);
                if (ret < 0)
                        goto out_problem;
                return 0;
@@ -1785,7 +1782,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 
        if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
            sample->cpumode == PERF_RECORD_MISC_KERNEL) {
-               ret = machine__process_kernel_mmap_event(machine, event);
+               struct extra_kernel_map xm = {
+                       .start = event->mmap.start,
+                       .end   = event->mmap.start + event->mmap.len,
+                       .pgoff = event->mmap.pgoff,
+               };
+
+               strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+               ret = machine__process_kernel_mmap_event(machine, &xm);
                if (ret < 0)
                        goto out_problem;
                return 0;
@@ -2019,11 +2023,12 @@ static void ip__resolve_ams(struct thread *thread,
        ams->ms.sym = al.sym;
        ams->ms.map = al.map;
        ams->phys_addr = 0;
+       ams->data_page_size = 0;
 }
 
 static void ip__resolve_data(struct thread *thread,
                             u8 m, struct addr_map_symbol *ams,
-                            u64 addr, u64 phys_addr)
+                            u64 addr, u64 phys_addr, u64 daddr_page_size)
 {
        struct addr_location al;
 
@@ -2037,6 +2042,7 @@ static void ip__resolve_data(struct thread *thread,
        ams->ms.sym = al.sym;
        ams->ms.map = al.map;
        ams->phys_addr = phys_addr;
+       ams->data_page_size = daddr_page_size;
 }
 
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -2049,7 +2055,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 
        ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
        ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
-                        sample->addr, sample->phys_addr);
+                        sample->addr, sample->phys_addr,
+                        sample->data_page_size);
        mi->data_src.val = sample->data_src;
 
        return mi;
index 5b8ca93..7d22ade 100644 (file)
@@ -19,5 +19,6 @@ struct addr_map_symbol {
        u64           addr;
        u64           al_addr;
        u64           phys_addr;
+       u64           data_page_size;
 };
 #endif // __PERF_MAP_SYMBOL
index ea0af0b..19007e4 100644 (file)
@@ -17,9 +17,10 @@ unsigned int perf_mem_events__loads_ldlat = 30;
 
 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
 
-struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-       E("ldlat-loads",        "cpu/mem-loads,ldlat=%u/P",     "mem-loads"),
-       E("ldlat-stores",       "cpu/mem-stores/P",             "mem-stores"),
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "cpu/mem-loads,ldlat=%u/P",     "cpu/events/mem-loads"),
+       E("ldlat-stores",       "cpu/mem-stores/P",             "cpu/events/mem-stores"),
+       E(NULL,                 NULL,                           NULL),
 };
 #undef E
 
@@ -28,19 +29,31 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 static char mem_loads_name[100];
 static bool mem_loads_name__init;
 
+struct perf_mem_event * __weak perf_mem_events__ptr(int i)
+{
+       if (i >= PERF_MEM_EVENTS__MAX)
+               return NULL;
+
+       return &perf_mem_events[i];
+}
+
 char * __weak perf_mem_events__name(int i)
 {
+       struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+       if (!e)
+               return NULL;
+
        if (i == PERF_MEM_EVENTS__LOAD) {
                if (!mem_loads_name__init) {
                        mem_loads_name__init = true;
                        scnprintf(mem_loads_name, sizeof(mem_loads_name),
-                                 perf_mem_events[i].name,
-                                 perf_mem_events__loads_ldlat);
+                                 e->name, perf_mem_events__loads_ldlat);
                }
                return mem_loads_name;
        }
 
-       return (char *)perf_mem_events[i].name;
+       return (char *)e->name;
 }
 
 int perf_mem_events__parse(const char *str)
@@ -61,7 +74,10 @@ int perf_mem_events__parse(const char *str)
 
        while (tok) {
                for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-                       struct perf_mem_event *e = &perf_mem_events[j];
+                       struct perf_mem_event *e = perf_mem_events__ptr(j);
+
+                       if (!e->tag)
+                               continue;
 
                        if (strstr(e->tag, tok))
                                e->record = found = true;
@@ -90,10 +106,17 @@ int perf_mem_events__init(void)
 
        for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
                char path[PATH_MAX];
-               struct perf_mem_event *e = &perf_mem_events[j];
+               struct perf_mem_event *e = perf_mem_events__ptr(j);
                struct stat st;
 
-               scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+               /*
+                * If the event entry isn't valid, skip initialization
+                * and "e->supported" will keep false.
+                */
+               if (!e->tag)
+                       continue;
+
+               scnprintf(path, PATH_MAX, "%s/devices/%s",
                          mnt, e->sysfs_name);
 
                if (!stat(path, &st))
@@ -108,10 +131,10 @@ void perf_mem_events__list(void)
        int j;
 
        for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-               struct perf_mem_event *e = &perf_mem_events[j];
+               struct perf_mem_event *e = perf_mem_events__ptr(j);
 
                fprintf(stderr, "%-13s%-*s%s\n",
-                       e->tag,
+                       e->tag ?: "",
                        verbose > 0 ? 25 : 0,
                        verbose > 0 ? perf_mem_events__name(j) : "",
                        e->supported ? ": available" : "");
index 904dad3..5ef1782 100644 (file)
@@ -28,16 +28,17 @@ struct mem_info {
 enum {
        PERF_MEM_EVENTS__LOAD,
        PERF_MEM_EVENTS__STORE,
+       PERF_MEM_EVENTS__LOAD_STORE,
        PERF_MEM_EVENTS__MAX,
 };
 
-extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
 extern unsigned int perf_mem_events__loads_ldlat;
 
 int perf_mem_events__parse(const char *str);
 int perf_mem_events__init(void);
 
 char *perf_mem_events__name(int i);
+struct perf_mem_event *perf_mem_events__ptr(int i);
 
 void perf_mem_events__list(void);
 
index c84f584..03a7d7b 100644 (file)
@@ -96,7 +96,8 @@ int mem2node__init(struct mem2node *map, struct perf_env *env)
 
        /* Cut unused entries, due to merging. */
        tmp_entries = realloc(entries, sizeof(*entries) * j);
-       if (tmp_entries || WARN_ON_ONCE(j == 0))
+       if (tmp_entries ||
+           WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n"))
                entries = tmp_entries;
 
        for (i = 0; i < j; i++) {
index 060454a..ee94d3e 100644 (file)
@@ -279,7 +279,9 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
                         * when then group is left.
                         */
                        if (!has_constraint &&
-                           ev->leader != metric_events[i]->leader)
+                           ev->leader != metric_events[i]->leader &&
+                           !strcmp(ev->leader->pmu_name,
+                                   metric_events[i]->leader->pmu_name))
                                break;
                        if (!strcmp(metric_events[i]->name, ev->name)) {
                                set_bit(ev->idx, evlist_used);
@@ -413,6 +415,12 @@ static bool match_metric(const char *n, const char *list)
        return false;
 }
 
+static bool match_pe_metric(struct pmu_event *pe, const char *metric)
+{
+       return match_metric(pe->metric_group, metric) ||
+              match_metric(pe->metric_name, metric);
+}
+
 struct mep {
        struct rb_node nd;
        const char *name;
@@ -491,6 +499,115 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
                putchar('\n');
 }
 
+static int metricgroup__print_pmu_event(struct pmu_event *pe,
+                                       bool metricgroups, char *filter,
+                                       bool raw, bool details,
+                                       struct rblist *groups,
+                                       struct strlist *metriclist)
+{
+       const char *g;
+       char *omg, *mg;
+
+       g = pe->metric_group;
+       if (!g && pe->metric_name) {
+               if (pe->name)
+                       return 0;
+               g = "No_group";
+       }
+
+       if (!g)
+               return 0;
+
+       mg = strdup(g);
+
+       if (!mg)
+               return -ENOMEM;
+       omg = mg;
+       while ((g = strsep(&mg, ";")) != NULL) {
+               struct mep *me;
+               char *s;
+
+               g = skip_spaces(g);
+               if (*g == 0)
+                       g = "No_group";
+               if (filter && !strstr(g, filter))
+                       continue;
+               if (raw)
+                       s = (char *)pe->metric_name;
+               else {
+                       if (asprintf(&s, "%s\n%*s%s]",
+                                    pe->metric_name, 8, "[", pe->desc) < 0)
+                               return -1;
+                       if (details) {
+                               if (asprintf(&s, "%s\n%*s%s]",
+                                            s, 8, "[", pe->metric_expr) < 0)
+                                       return -1;
+                       }
+               }
+
+               if (!s)
+                       continue;
+
+               if (!metricgroups) {
+                       strlist__add(metriclist, s);
+               } else {
+                       me = mep_lookup(groups, g);
+                       if (!me)
+                               continue;
+                       strlist__add(me->metrics, s);
+               }
+
+               if (!raw)
+                       free(s);
+       }
+       free(omg);
+
+       return 0;
+}
+
+struct metricgroup_print_sys_idata {
+       struct strlist *metriclist;
+       char *filter;
+       struct rblist *groups;
+       bool metricgroups;
+       bool raw;
+       bool details;
+};
+
+typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *);
+
+struct metricgroup_iter_data {
+       metricgroup_sys_event_iter_fn fn;
+       void *data;
+};
+
+static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data)
+{
+       struct metricgroup_iter_data *d = data;
+       struct perf_pmu *pmu = NULL;
+
+       if (!pe->metric_expr || !pe->compat)
+               return 0;
+
+       while ((pmu = perf_pmu__scan(pmu))) {
+
+               if (!pmu->id || strcmp(pmu->id, pe->compat))
+                       continue;
+
+               return d->fn(pe, d->data);
+       }
+
+       return 0;
+}
+
+static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data)
+{
+       struct metricgroup_print_sys_idata *d = data;
+
+       return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw,
+                                    d->details, d->groups, d->metriclist);
+}
+
 void metricgroup__print(bool metrics, bool metricgroups, char *filter,
                        bool raw, bool details)
 {
@@ -501,9 +618,6 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
        struct rb_node *node, *next;
        struct strlist *metriclist = NULL;
 
-       if (!map)
-               return;
-
        if (!metricgroups) {
                metriclist = strlist__new(NULL, NULL);
                if (!metriclist)
@@ -514,67 +628,33 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
        groups.node_new = mep_new;
        groups.node_cmp = mep_cmp;
        groups.node_delete = mep_delete;
-       for (i = 0; ; i++) {
-               const char *g;
+       for (i = 0; map; i++) {
                pe = &map->table[i];
 
                if (!pe->name && !pe->metric_group && !pe->metric_name)
                        break;
                if (!pe->metric_expr)
                        continue;
-               g = pe->metric_group;
-               if (!g && pe->metric_name) {
-                       if (pe->name)
-                               continue;
-                       g = "No_group";
-               }
-               if (g) {
-                       char *omg;
-                       char *mg = strdup(g);
-
-                       if (!mg)
-                               return;
-                       omg = mg;
-                       while ((g = strsep(&mg, ";")) != NULL) {
-                               struct mep *me;
-                               char *s;
-
-                               g = skip_spaces(g);
-                               if (*g == 0)
-                                       g = "No_group";
-                               if (filter && !strstr(g, filter))
-                                       continue;
-                               if (raw)
-                                       s = (char *)pe->metric_name;
-                               else {
-                                       if (asprintf(&s, "%s\n%*s%s]",
-                                                    pe->metric_name, 8, "[", pe->desc) < 0)
-                                               return;
-
-                                       if (details) {
-                                               if (asprintf(&s, "%s\n%*s%s]",
-                                                            s, 8, "[", pe->metric_expr) < 0)
-                                                       return;
-                                       }
-                               }
-
-                               if (!s)
-                                       continue;
+               if (metricgroup__print_pmu_event(pe, metricgroups, filter,
+                                                raw, details, &groups,
+                                                metriclist) < 0)
+                       return;
+       }
 
-                               if (!metricgroups) {
-                                       strlist__add(metriclist, s);
-                               } else {
-                                       me = mep_lookup(&groups, g);
-                                       if (!me)
-                                               continue;
-                                       strlist__add(me->metrics, s);
-                               }
+       {
+               struct metricgroup_iter_data data = {
+                       .fn = metricgroup__print_sys_event_iter,
+                       .data = (void *) &(struct metricgroup_print_sys_idata){
+                               .metriclist = metriclist,
+                               .metricgroups = metricgroups,
+                               .filter = filter,
+                               .raw = raw,
+                               .details = details,
+                               .groups = &groups,
+                       },
+               };
 
-                               if (!raw)
-                                       free(s);
-                       }
-                       free(omg);
-               }
+               pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
        }
 
        if (!filter || !rblist__empty(&groups)) {
@@ -683,6 +763,16 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused)
        return 1;
 }
 
+struct metricgroup_add_iter_data {
+       struct list_head *metric_list;
+       const char *metric;
+       struct metric **m;
+       struct expr_ids *ids;
+       int *ret;
+       bool *has_match;
+       bool metric_no_group;
+};
+
 static int __add_metric(struct list_head *metric_list,
                        struct pmu_event *pe,
                        bool metric_no_group,
@@ -792,10 +882,11 @@ static int __add_metric(struct list_head *metric_list,
        return 0;
 }
 
-#define map_for_each_event(__pe, __idx, __map)                         \
-       for (__idx = 0, __pe = &__map->table[__idx];                    \
-            __pe->name || __pe->metric_group || __pe->metric_name;     \
-            __pe = &__map->table[++__idx])
+#define map_for_each_event(__pe, __idx, __map)                                 \
+       if (__map)                                                              \
+               for (__idx = 0, __pe = &__map->table[__idx];                    \
+                    __pe->name || __pe->metric_group || __pe->metric_name;     \
+                    __pe = &__map->table[++__idx])
 
 #define map_for_each_metric(__pe, __idx, __map, __metric)              \
        map_for_each_event(__pe, __idx, __map)                          \
@@ -833,7 +924,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa
        if (ret)
                return ret;
 
-       p = data->parent;
+       p = expr_id_data__parent(data);
 
        while (p->parent) {
                if (!strcmp(p->id, id)) {
@@ -854,7 +945,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa
        }
 
        p->id     = strdup(id);
-       p->parent = data->parent;
+       p->parent = expr_id_data__parent(data);
        *parent   = p;
 
        return p->id ? 0 : -ENOMEM;
@@ -963,6 +1054,29 @@ static int add_metric(struct list_head *metric_list,
        return ret;
 }
 
+static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
+                                                 void *data)
+{
+       struct metricgroup_add_iter_data *d = data;
+       int ret;
+
+       if (!match_pe_metric(pe, d->metric))
+               return 0;
+
+       ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids);
+       if (ret)
+               return ret;
+
+       ret = resolve_metric(d->metric_no_group,
+                                    d->metric_list, NULL, d->ids);
+       if (ret)
+               return ret;
+
+       *(d->has_match) = true;
+
+       return *d->ret;
+}
+
 static int metricgroup__add_metric(const char *metric, bool metric_no_group,
                                   struct strbuf *events,
                                   struct list_head *metric_list,
@@ -993,6 +1107,22 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group,
                        goto out;
        }
 
+       {
+               struct metricgroup_iter_data data = {
+                       .fn = metricgroup__add_metric_sys_event_iter,
+                       .data = (void *) &(struct metricgroup_add_iter_data) {
+                               .metric_list = &list,
+                               .metric = metric,
+                               .metric_no_group = metric_no_group,
+                               .m = &m,
+                               .ids = &ids,
+                               .has_match = &has_match,
+                               .ret = &ret,
+                       },
+               };
+
+               pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+       }
        /* End of pmu events. */
        if (!has_match) {
                ret = -EINVAL;
@@ -1119,8 +1249,6 @@ int metricgroup__parse_groups(const struct option *opt,
        struct evlist *perf_evlist = *(struct evlist **)opt->value;
        struct pmu_events_map *map = perf_pmu__find_map(NULL);
 
-       if (!map)
-               return 0;
 
        return parse_groups(perf_evlist, str, metric_no_group,
                            metric_no_merge, NULL, metric_events, map);
index 3b27358..42c84ad 100644 (file)
@@ -668,6 +668,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
        return ret;
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 struct __add_bpf_event_param {
        struct parse_events_state *parse_state;
        struct list_head *list;
@@ -900,6 +901,30 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
                list_splice_tail(&obj_head_config, head_config);
        return err;
 }
+#else // HAVE_LIBBPF_SUPPORT
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+                             struct list_head *list __maybe_unused,
+                             struct bpf_object *obj __maybe_unused,
+                             struct list_head *head_config __maybe_unused)
+{
+       parse_events__handle_error(parse_state->error, 0,
+                                  strdup("BPF support is not compiled"),
+                                  strdup("Make sure libbpf-devel is available at build time."));
+       return -ENOTSUP;
+}
+
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+                         struct list_head *list __maybe_unused,
+                         char *bpf_file_name __maybe_unused,
+                         bool source __maybe_unused,
+                         struct list_head *head_config __maybe_unused)
+{
+       parse_events__handle_error(parse_state->error, 0,
+                                  strdup("BPF support is not compiled"),
+                                  strdup("Make sure libbpf-devel is available at build time."));
+       return -ENOTSUP;
+}
+#endif // HAVE_LIBBPF_SUPPORT
 
 static int
 parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
@@ -1744,7 +1769,7 @@ void parse_events__set_leader(char *name, struct list_head *list,
        if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
                return;
 
-       __perf_evlist__set_leader(list);
+       __evlist__set_leader(list);
        leader = list_entry(list->next, struct evsel, core.node);
        leader->group_name = name ? strdup(name) : NULL;
 }
@@ -2158,7 +2183,7 @@ int __parse_events(struct evlist *evlist, const char *str,
        /*
         * Add list to the evlist even with errors to allow callers to clean up.
         */
-       perf_evlist__splice_list_tail(evlist, &parse_state.list);
+       evlist__splice_list_tail(evlist, &parse_state.list);
 
        if (!ret) {
                struct evsel *last;
index e687497..a4a1004 100644 (file)
@@ -54,7 +54,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
 #endif
                                fputc('\n', stderr);
                                /* just printing available regs */
-                               return -1;
+                               goto error;
                        }
 #ifdef HAVE_PERF_REGS_SUPPORT
                        for (r = sample_reg_masks; r->name; r++) {
index e67a227..fb0bb66 100644 (file)
@@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
                bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
                bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
                bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
-               bit_name(CGROUP),
+               bit_name(CGROUP), bit_name(DATA_PAGE_SIZE),
                { .name = NULL, }
        };
 #undef bit_name
index d41caeb..44ef283 100644 (file)
@@ -597,6 +597,7 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path)
  * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
  * may have a "cpus" file.
  */
+#define SYS_TEMPLATE_ID        "./bus/event_source/devices/%s/identifier"
 #define CPUS_TEMPLATE_UNCORE   "%s/bus/event_source/devices/%s/cpumask"
 #define CPUS_TEMPLATE_CPU      "%s/bus/event_source/devices/%s/cpus"
 
@@ -635,6 +636,21 @@ static bool pmu_is_uncore(const char *name)
        return file_available(path);
 }
 
+static char *pmu_id(const char *name)
+{
+       char path[PATH_MAX], *str;
+       size_t len;
+
+       snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name);
+
+       if (sysfs__read_str(path, &str, &len) < 0)
+               return NULL;
+
+       str[len - 1] = 0; /* remove line feed */
+
+       return str;
+}
+
 /*
  *  PMU CORE devices have different name other than cpu in sysfs on some
  *  platforms.
@@ -796,6 +812,83 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
        pmu_add_cpu_aliases_map(head, pmu, map);
 }
 
+void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data)
+{
+       int i = 0;
+
+       while (1) {
+               struct pmu_sys_events *event_table;
+               int j = 0;
+
+               event_table = &pmu_sys_event_tables[i++];
+
+               if (!event_table->table)
+                       break;
+
+               while (1) {
+                       struct pmu_event *pe = &event_table->table[j++];
+                       int ret;
+
+                       if (!pe->name && !pe->metric_group && !pe->metric_name)
+                               break;
+
+                       ret = fn(pe, data);
+                       if (ret)
+                               break;
+               }
+       }
+}
+
+struct pmu_sys_event_iter_data {
+       struct list_head *head;
+       struct perf_pmu *pmu;
+};
+
+static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data)
+{
+       struct pmu_sys_event_iter_data *idata = data;
+       struct perf_pmu *pmu = idata->pmu;
+
+       if (!pe->name) {
+               if (pe->metric_group || pe->metric_name)
+                       return 0;
+               return -EINVAL;
+       }
+
+       if (!pe->compat || !pe->pmu)
+               return 0;
+
+       if (!strcmp(pmu->id, pe->compat) &&
+           pmu_uncore_alias_match(pe->pmu, pmu->name)) {
+               __perf_pmu__new_alias(idata->head, NULL,
+                                     (char *)pe->name,
+                                     (char *)pe->desc,
+                                     (char *)pe->event,
+                                     (char *)pe->long_desc,
+                                     (char *)pe->topic,
+                                     (char *)pe->unit,
+                                     (char *)pe->perpkg,
+                                     (char *)pe->metric_expr,
+                                     (char *)pe->metric_name,
+                                     (char *)pe->deprecated);
+       }
+
+       return 0;
+}
+
+static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+       struct pmu_sys_event_iter_data idata = {
+               .head = head,
+               .pmu = pmu,
+       };
+
+       if (!pmu->id)
+               return;
+
+       pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata);
+}
+
 struct perf_event_attr * __weak
 perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
 {
@@ -847,8 +940,11 @@ static struct perf_pmu *pmu_lookup(const char *name)
        pmu->name = strdup(name);
        pmu->type = type;
        pmu->is_uncore = pmu_is_uncore(name);
+       if (pmu->is_uncore)
+               pmu->id = pmu_id(name);
        pmu->max_precise = pmu_max_precise(name);
        pmu_add_cpu_aliases(&aliases, pmu);
+       pmu_add_sys_aliases(&aliases, pmu);
 
        INIT_LIST_HEAD(&pmu->format);
        INIT_LIST_HEAD(&pmu->aliases);
index a64e9c9..8164388 100644 (file)
@@ -30,6 +30,7 @@ struct perf_pmu_caps {
 
 struct perf_pmu {
        char *name;
+       char *id;
        __u32 type;
        bool selectable;
        bool is_uncore;
@@ -116,6 +117,8 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
 bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
 void perf_pmu_free_alias(struct perf_pmu_alias *alias);
 
+typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data);
+void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data);
 int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
 int perf_pmu__caps_parse(struct perf_pmu *pmu);
index ae8edde..cc5ade8 100644 (file)
@@ -1055,7 +1055,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
                if (pyevent == NULL)
                        return PyErr_NoMemory();
 
-               evsel = perf_evlist__event2evsel(evlist, event);
+               evsel = evlist__event2evsel(evlist, event);
                if (!evsel) {
                        Py_INCREF(Py_None);
                        return Py_None;
@@ -1089,7 +1089,7 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
                return NULL;
 
        if (group)
-               perf_evlist__set_leader(evlist);
+               evlist__set_leader(evlist);
 
        if (evlist__open(evlist) < 0) {
                PyErr_SetFromErrno(PyExc_OSError);
index 07e4b96..e70c9dd 100644 (file)
@@ -89,8 +89,7 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev
                            leader->core.attr.sample_type;
 }
 
-void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
-                        struct callchain_param *callchain)
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
 {
        struct evsel *evsel;
        bool use_sample_identifier = false;
@@ -102,7 +101,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
         * since some might depend on this info.
         */
        if (opts->group)
-               perf_evlist__set_leader(evlist);
+               evlist__set_leader(evlist);
 
        if (evlist->core.cpus->map[0] < 0)
                opts->no_inherit = true;
@@ -144,7 +143,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
                        evsel__set_sample_id(evsel, use_sample_identifier);
        }
 
-       perf_evlist__set_id_pos(evlist);
+       evlist__set_id_pos(evlist);
 }
 
 static int get_max_rate(unsigned int *rate)
@@ -217,7 +216,7 @@ int record_opts__config(struct record_opts *opts)
        return record_opts__config_freq(opts);
 }
 
-bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
+bool evlist__can_select_event(struct evlist *evlist, const char *str)
 {
        struct evlist *temp_evlist;
        struct evsel *evsel;
index 266760a..694b351 100644 (file)
@@ -22,6 +22,7 @@ struct record_opts {
        bool          raw_samples;
        bool          sample_address;
        bool          sample_phys_addr;
+       bool          sample_data_page_size;
        bool          sample_weight;
        bool          sample_time;
        bool          sample_time_set;
index f886199..078a717 100644 (file)
@@ -96,9 +96,9 @@
  * |    than PERF_RECORD_USER_TYPE_START) are handled by
  * |    perf_session__process_user_event(see below)
  * |  - Those generated by the kernel are handled by
- * |    perf_evlist__parse_sample_timestamp()
+ * |    evlist__parse_sample_timestamp()
  * |
- * perf_evlist__parse_sample_timestamp()
+ * evlist__parse_sample_timestamp()
  * |  Extract time stamp from sample data.
  * |
  * perf_session__queue_event()
@@ -932,7 +932,7 @@ s390_cpumsf_process_event(struct perf_session *session,
        if (event->header.type == PERF_RECORD_SAMPLE &&
            sample->raw_size) {
                /* Handle event with raw data */
-               ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
+               ev_bc000 = evlist__event2evsel(session->evlist, event);
                if (ev_bc000 &&
                    ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
                        err = s390_cpumcf_dumpctr(sf, sample);
index 05b43ab..cfcf8d5 100644 (file)
@@ -197,15 +197,14 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample)
  * its raw data.
  * The function is only invoked when the dump flag -D is set.
  */
-void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
-                                 struct perf_sample *sample)
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
        struct evsel *ev_bc000;
 
        if (event->header.type != PERF_RECORD_SAMPLE)
                return;
 
-       ev_bc000 = perf_evlist__event2evsel(evlist, event);
+       ev_bc000 = evlist__event2evsel(evlist, event);
        if (ev_bc000 == NULL ||
            ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
                return;
index e84bbe0..cde5cd3 100644 (file)
@@ -9,10 +9,10 @@
  * Check platform the perf data file was created on and perform platform
  * specific interpretation.
  */
-void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist)
+void evlist__init_trace_event_sample_raw(struct evlist *evlist)
 {
        const char *arch_pf = perf_env__arch(evlist->env);
 
        if (arch_pf && !strcmp("s390", arch_pf))
-               evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw;
+               evlist->trace_event_sample_raw = evlist__s390_sample_raw;
 }
index afe1491..4be84a5 100644 (file)
@@ -6,9 +6,6 @@ struct evlist;
 union perf_event;
 struct perf_sample;
 
-void perf_evlist__s390_sample_raw(struct evlist *evlist,
-                                 union perf_event *event,
-                                 struct perf_sample *sample);
-
-void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist);
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+void evlist__init_trace_event_sample_raw(struct evlist *evlist);
 #endif /* __PERF_EVLIST_H */
index 0980802..50ff979 100644 (file)
@@ -32,6 +32,7 @@
 #include "ui/progress.h"
 #include "../perf.h"
 #include "arch/common.h"
+#include "units.h"
 #include <internal/lib.h>
 
 #ifdef HAVE_ZSTD_SUPPORT
@@ -125,7 +126,7 @@ static int perf_session__open(struct perf_session *session)
                return -1;
        }
 
-       if (!perf_evlist__valid_read_format(session->evlist)) {
+       if (!evlist__valid_read_format(session->evlist)) {
                pr_err("non matching read_format\n");
                return -1;
        }
@@ -135,7 +136,7 @@ static int perf_session__open(struct perf_session *session)
 
 void perf_session__set_id_hdr_size(struct perf_session *session)
 {
-       u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);
+       u16 id_hdr_size = evlist__id_hdr_size(session->evlist);
 
        machines__set_id_hdr_size(&session->machines, id_hdr_size);
 }
@@ -221,7 +222,7 @@ struct perf_session *perf_session__new(struct perf_data *data,
                                perf_session__set_comm_exec(session);
                        }
 
-                       perf_evlist__init_trace_event_sample_raw(session->evlist);
+                       evlist__init_trace_event_sample_raw(session->evlist);
 
                        /* Open the directory data. */
                        if (data->is_dir) {
@@ -1191,9 +1192,7 @@ static void stack_user__printf(struct stack_dump *dump)
               dump->size, dump->offset);
 }
 
-static void perf_evlist__print_tstamp(struct evlist *evlist,
-                                      union perf_event *event,
-                                      struct perf_sample *sample)
+static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
        u64 sample_type = __evlist__combined_sample_type(evlist);
 
@@ -1254,16 +1253,25 @@ static void dump_event(struct evlist *evlist, union perf_event *event,
                evlist->trace_event_sample_raw(evlist, event, sample);
 
        if (sample)
-               perf_evlist__print_tstamp(evlist, event, sample);
+               evlist__print_tstamp(evlist, event, sample);
 
        printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
               event->header.size, perf_event__name(event->header.type));
 }
 
+char *get_page_size_name(u64 size, char *str)
+{
+       if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size))
+               snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A");
+
+       return str;
+}
+
 static void dump_sample(struct evsel *evsel, union perf_event *event,
                        struct perf_sample *sample)
 {
        u64 sample_type;
+       char str[PAGE_SIZE_NAME_LEN];
 
        if (!dump_trace)
                return;
@@ -1298,6 +1306,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
        if (sample_type & PERF_SAMPLE_PHYS_ADDR)
                printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
 
+       if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));
+
        if (sample_type & PERF_SAMPLE_TRANSACTION)
                printf("... transaction: %" PRIx64 "\n", sample->transaction);
 
@@ -1364,7 +1375,7 @@ static int deliver_sample_value(struct evlist *evlist,
                                struct sample_read_value *v,
                                struct machine *machine)
 {
-       struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
+       struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
        struct evsel *evsel;
 
        if (sid) {
@@ -1409,13 +1420,9 @@ static int deliver_sample_group(struct evlist *evlist,
        return ret;
 }
 
-static int
- perf_evlist__deliver_sample(struct evlist *evlist,
-                            struct perf_tool *tool,
-                            union  perf_event *event,
-                            struct perf_sample *sample,
-                            struct evsel *evsel,
-                            struct machine *machine)
+static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
+                                 union  perf_event *event, struct perf_sample *sample,
+                                 struct evsel *evsel, struct machine *machine)
 {
        /* We know evsel != NULL. */
        u64 sample_type = evsel->core.attr.sample_type;
@@ -1445,7 +1452,7 @@ static int machines__deliver_event(struct machines *machines,
 
        dump_event(evlist, event, file_offset, sample);
 
-       evsel = perf_evlist__id2evsel(evlist, sample->id);
+       evsel = evlist__id2evsel(evlist, sample->id);
 
        machine = machines__find_for_cpumode(machines, event, sample);
 
@@ -1460,7 +1467,7 @@ static int machines__deliver_event(struct machines *machines,
                        ++evlist->stats.nr_unprocessable_samples;
                        return 0;
                }
-               return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
+               return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
        case PERF_RECORD_MMAP:
                return tool->mmap(tool, event, sample, machine);
        case PERF_RECORD_MMAP2:
@@ -1523,9 +1530,8 @@ static int perf_session__deliver_event(struct perf_session *session,
                                       u64 file_offset)
 {
        struct perf_sample sample;
-       int ret;
+       int ret = evlist__parse_sample(session->evlist, event, &sample);
 
-       ret = perf_evlist__parse_sample(session->evlist, event, &sample);
        if (ret) {
                pr_err("Can't parse sample, err = %d\n", ret);
                return ret;
@@ -1697,7 +1703,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 out_parse_sample:
 
        if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
-           perf_evlist__parse_sample(session->evlist, event, sample))
+           evlist__parse_sample(session->evlist, event, sample))
                return -1;
 
        *event_ptr = event;
@@ -1754,7 +1760,7 @@ static s64 perf_session__process_event(struct perf_session *session,
        if (tool->ordered_events) {
                u64 timestamp = -1ULL;
 
-               ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+               ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
                if (ret && ret != -1)
                        return ret;
 
@@ -1937,7 +1943,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
 {
        struct ordered_events *oe = &session->ordered_events;
        struct perf_tool *tool = session->tool;
-       int fd = perf_data__fd(session->data);
        union perf_event *event;
        uint32_t size, cur_size = 0;
        void *buf = NULL;
@@ -1957,7 +1962,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
        ordered_events__set_copy_on_queue(oe, true);
 more:
        event = buf;
-       err = readn(fd, event, sizeof(struct perf_event_header));
+       err = perf_data__read(session->data, event,
+                             sizeof(struct perf_event_header));
        if (err <= 0) {
                if (err == 0)
                        goto done;
@@ -1989,7 +1995,8 @@ more:
        p += sizeof(struct perf_event_header);
 
        if (size - sizeof(struct perf_event_header)) {
-               err = readn(fd, p, size - sizeof(struct perf_event_header));
+               err = perf_data__read(session->data, p,
+                                     size - sizeof(struct perf_event_header));
                if (err <= 0) {
                        if (err == 0) {
                                pr_err("unexpected end of event stream\n");
@@ -2476,7 +2483,7 @@ int perf_event__process_id_index(struct perf_session *session,
                        fprintf(stdout, "  tid: %"PRI_ld64"\n", e->tid);
                }
 
-               sid = perf_evlist__id2sid(evlist, e->id);
+               sid = evlist__id2sid(evlist, e->id);
                if (!sid)
                        return -ENOENT;
                sid->idx = e->idx;
index ded9ced..748371a 100644 (file)
@@ -12,8 +12,8 @@
 #include <sched.h>
 #include <stdbool.h>
 
-int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
-                             evsel__sb_cb_t cb, void *data)
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+                        evsel__sb_cb_t cb, void *data)
 {
        struct evsel *evsel;
 
@@ -62,7 +62,7 @@ static void *perf_evlist__poll_thread(void *arg)
                        if (perf_mmap__read_init(&map->core))
                                continue;
                        while ((event = perf_mmap__read_event(&map->core)) != NULL) {
-                               struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+                               struct evsel *evsel = evlist__event2evsel(evlist, event);
 
                                if (evsel && evsel->side_band.cb)
                                        evsel->side_band.cb(event, evsel->side_band.data);
@@ -94,14 +94,14 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
       }
 }
 
-int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
 {
        struct evsel *counter;
 
        if (!evlist)
                return 0;
 
-       if (perf_evlist__create_maps(evlist, target))
+       if (evlist__create_maps(evlist, target))
                goto out_delete_evlist;
 
        if (evlist->core.nr_entries > 1) {
@@ -110,7 +110,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target)
                evlist__for_each_entry(evlist, counter)
                        evsel__set_sample_id(counter, can_sample_identifier);
 
-               perf_evlist__set_id_pos(evlist);
+               evlist__set_id_pos(evlist);
        }
 
        evlist__for_each_entry(evlist, counter) {
@@ -138,7 +138,7 @@ out_delete_evlist:
        return -1;
 }
 
-void perf_evlist__stop_sb_thread(struct evlist *evlist)
+void evlist__stop_sb_thread(struct evlist *evlist)
 {
        if (!evlist)
                return;
index d42339d..80907bc 100644 (file)
@@ -1463,6 +1463,35 @@ struct sort_entry sort_mem_phys_daddr = {
 };
 
 static int64_t
+sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+       uint64_t l = 0, r = 0;
+
+       if (left->mem_info)
+               l = left->mem_info->daddr.data_page_size;
+       if (right->mem_info)
+               r = right->mem_info->daddr.data_page_size;
+
+       return (int64_t)(r - l);
+}
+
+static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf,
+                                         size_t size, unsigned int width)
+{
+       char str[PAGE_SIZE_NAME_LEN];
+
+       return repsep_snprintf(bf, size, "%-*s", width,
+                              get_page_size_name(he->mem_info->daddr.data_page_size, str));
+}
+
+struct sort_entry sort_mem_data_page_size = {
+       .se_header      = "Data Page Size",
+       .se_cmp         = sort__data_page_size_cmp,
+       .se_snprintf    = hist_entry__data_page_size_snprintf,
+       .se_width_idx   = HISTC_MEM_DATA_PAGE_SIZE,
+};
+
+static int64_t
 sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
 {
        if (!left->branch_info || !right->branch_info)
@@ -1740,6 +1769,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
        DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
        DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
        DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
+       DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
 };
 
 #undef DIM
@@ -2756,7 +2786,7 @@ static const char *get_default_sort_order(struct evlist *evlist)
 
        BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
 
-       if (evlist == NULL || perf_evlist__empty(evlist))
+       if (evlist == NULL || evlist__empty(evlist))
                goto out_no_evlist;
 
        evlist__for_each_entry(evlist, evsel) {
index 66d39c4..e50f2b6 100644 (file)
@@ -255,6 +255,7 @@ enum sort_type {
        SORT_MEM_DCACHELINE,
        SORT_MEM_IADDR_SYMBOL,
        SORT_MEM_PHYS_DADDR,
+       SORT_MEM_DATA_PAGE_SIZE,
 };
 
 /*
index a963b5b..fee7543 100644 (file)
@@ -1184,12 +1184,8 @@ static void print_percore(struct perf_stat_config *config,
                fputc('\n', output);
 }
 
-void
-perf_evlist__print_counters(struct evlist *evlist,
-                           struct perf_stat_config *config,
-                           struct target *_target,
-                           struct timespec *ts,
-                           int argc, const char **argv)
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+                           struct target *_target, struct timespec *ts, int argc, const char **argv)
 {
        bool metric_only = config->metric_only;
        int interval = config->interval;
index bd0decd..1e125e3 100644 (file)
@@ -184,7 +184,7 @@ static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
        return 0;
 }
 
-int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
+int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 {
        struct evsel *evsel;
 
@@ -196,11 +196,11 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
        return 0;
 
 out_free:
-       perf_evlist__free_stats(evlist);
+       evlist__free_stats(evlist);
        return -1;
 }
 
-void perf_evlist__free_stats(struct evlist *evlist)
+void evlist__free_stats(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -211,7 +211,7 @@ void perf_evlist__free_stats(struct evlist *evlist)
        }
 }
 
-void perf_evlist__reset_stats(struct evlist *evlist)
+void evlist__reset_stats(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -221,7 +221,7 @@ void perf_evlist__reset_stats(struct evlist *evlist)
        }
 }
 
-void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
+void evlist__reset_prev_raw_counts(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -229,7 +229,7 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
                evsel__reset_prev_raw_counts(evsel);
 }
 
-static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
+static void evsel__copy_prev_raw_counts(struct evsel *evsel)
 {
        int ncpus = evsel__nr_cpus(evsel);
        int nthreads = perf_thread_map__nr(evsel->core.threads);
@@ -245,15 +245,15 @@ static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
        evsel->counts->aggr = evsel->prev_raw_counts->aggr;
 }
 
-void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
+void evlist__copy_prev_raw_counts(struct evlist *evlist)
 {
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel)
-               perf_evsel__copy_prev_raw_counts(evsel);
+               evsel__copy_prev_raw_counts(evsel);
 }
 
-void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
 {
        struct evsel *evsel;
 
@@ -458,7 +458,7 @@ int perf_event__process_stat_event(struct perf_session *session,
        count.ena = st->ena;
        count.run = st->run;
 
-       counter = perf_evlist__id2evsel(session->evlist, st->id);
+       counter = evlist__id2evsel(session->evlist, st->id);
        if (!counter) {
                pr_err("Failed to resolve counter for stat event.\n");
                return -EINVAL;
index 487010c..9979b4b 100644 (file)
@@ -122,6 +122,7 @@ struct perf_stat_config {
        bool                     metric_no_group;
        bool                     metric_no_merge;
        bool                     stop_read_counter;
+       bool                     quiet;
        FILE                    *output;
        unsigned int             interval;
        unsigned int             timeout;
@@ -212,12 +213,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
                                   struct runtime_stat *st);
 void perf_stat__collect_metric_expr(struct evlist *);
 
-int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
-void perf_evlist__free_stats(struct evlist *evlist);
-void perf_evlist__reset_stats(struct evlist *evlist);
-void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
-void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
-void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
+int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
+void evlist__free_stats(struct evlist *evlist);
+void evlist__reset_stats(struct evlist *evlist);
+void evlist__reset_prev_raw_counts(struct evlist *evlist);
+void evlist__copy_prev_raw_counts(struct evlist *evlist);
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
                              struct evsel *counter);
@@ -237,12 +238,8 @@ int create_perf_stat_counter(struct evsel *evsel,
                             struct perf_stat_config *config,
                             struct target *target,
                             int cpu);
-void
-perf_evlist__print_counters(struct evlist *evlist,
-                           struct perf_stat_config *config,
-                           struct target *_target,
-                           struct timespec *ts,
-                           int argc, const char **argv);
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+                           struct target *_target, struct timespec *ts, int argc, const char **argv);
 
 struct metric_expr;
 double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
index 44dd86a..f3577f7 100644 (file)
@@ -534,7 +534,7 @@ out:
 
 #ifdef HAVE_LIBBFD_BUILDID_SUPPORT
 
-int filename__read_build_id(const char *filename, struct build_id *bid)
+static int read_build_id(const char *filename, struct build_id *bid)
 {
        size_t size = sizeof(bid->data);
        int err = -1;
@@ -563,7 +563,7 @@ out_close:
 
 #else // HAVE_LIBBFD_BUILDID_SUPPORT
 
-int filename__read_build_id(const char *filename, struct build_id *bid)
+static int read_build_id(const char *filename, struct build_id *bid)
 {
        size_t size = sizeof(bid->data);
        int fd, err = -1;
@@ -595,6 +595,39 @@ out:
 
 #endif // HAVE_LIBBFD_BUILDID_SUPPORT
 
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+       struct kmod_path m = { .name = NULL, };
+       char path[PATH_MAX];
+       int err;
+
+       if (!filename)
+               return -EFAULT;
+
+       err = kmod_path__parse(&m, filename);
+       if (err)
+               return -1;
+
+       if (m.comp) {
+               int error = 0, fd;
+
+               fd = filename__decompress(filename, path, sizeof(path), m.comp, &error);
+               if (fd < 0) {
+                       pr_debug("Failed to decompress (error %d) %s\n",
+                                error, filename);
+                       return -1;
+               }
+               close(fd);
+               filename = path;
+       }
+
+       err = read_build_id(filename, bid);
+
+       if (m.comp)
+               unlink(filename);
+       return err;
+}
+
 int sysfs__read_build_id(const char *filename, struct build_id *bid)
 {
        size_t size = sizeof(bid->data);
index 0d14abd..64a039c 100644 (file)
@@ -2189,6 +2189,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map)
        int err;
        const char *kallsyms_filename = NULL;
        char *kallsyms_allocated_filename = NULL;
+       char *filename = NULL;
+
        /*
         * Step 1: if the user specified a kallsyms or vmlinux filename, use
         * it and only it, reporting errors to the user if it cannot be used.
@@ -2213,6 +2215,20 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map)
                return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
        }
 
+       /*
+        * Before checking on common vmlinux locations, check if it's
+        * stored as standard build id binary (not kallsyms) under
+        * .debug cache.
+        */
+       if (!symbol_conf.ignore_vmlinux_buildid)
+               filename = __dso__build_id_filename(dso, NULL, 0, false, false);
+       if (filename != NULL) {
+               err = dso__load_vmlinux(dso, map, filename, true);
+               if (err > 0)
+                       return err;
+               free(filename);
+       }
+
        if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
                err = dso__load_vmlinux_path(dso, map);
                if (err > 0)
index d9c6243..2947e3f 100644 (file)
@@ -1409,6 +1409,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
        if (type & PERF_SAMPLE_CGROUP)
                result += sizeof(u64);
 
+       if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
+               result += sizeof(u64);
+
        if (type & PERF_SAMPLE_AUX) {
                result += sizeof(u64);
                result += sample->aux_sample.size;
@@ -1588,6 +1591,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
                array++;
        }
 
+       if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+               *array = sample->data_page_size;
+               array++;
+       }
+
        if (type & PERF_SAMPLE_AUX) {
                sz = sample->aux_sample.size;
                *array++ = sz;
@@ -1643,7 +1651,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
 
                        e->id = evsel->core.id[j];
 
-                       sid = perf_evlist__id2sid(evlist, e->id);
+                       sid = evlist__id2sid(evlist, e->id);
                        if (!sid) {
                                free(ev);
                                return -ENOENT;
index 7a3dbc2..0ada907 100644 (file)
 
 static char *debuginfo_path;
 
+static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata,
+                           const char *modname __maybe_unused, Dwarf_Addr base __maybe_unused,
+                           const char *file_name, const char *debuglink_file __maybe_unused,
+                           GElf_Word debuglink_crc __maybe_unused, char **debuginfo_file_name)
+{
+       const struct dso *dso = *userdata;
+
+       assert(dso);
+       if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename))
+               *debuginfo_file_name = strdup(dso->symsrc_filename);
+       return -1;
+}
+
 static const Dwfl_Callbacks offline_callbacks = {
-       .find_debuginfo         = dwfl_standard_find_debuginfo,
+       .find_debuginfo         = __find_debuginfo,
        .debuginfo_path         = &debuginfo_path,
        .section_address        = dwfl_offline_section_address,
+       // .find_elf is not set as we use dwfl_report_elf() instead.
 };
 
 static int __report_module(struct addr_location *al, u64 ip,
@@ -46,16 +60,24 @@ static int __report_module(struct addr_location *al, u64 ip,
        mod = dwfl_addrmodule(ui->dwfl, ip);
        if (mod) {
                Dwarf_Addr s;
+               void **userdatap;
 
-               dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+               dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL);
+               *userdatap = dso;
                if (s != al->map->start - al->map->pgoff)
                        mod = 0;
        }
 
        if (!mod)
-               mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-                                     (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
-                                     false);
+               mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1,
+                                     al->map->start - al->map->pgoff, false);
+       if (!mod) {
+               char filename[PATH_MAX];
+
+               if (dso__build_id_filename(dso, filename, sizeof(filename), false))
+                       mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
+                                             al->map->start - al->map->pgoff, false);
+       }
 
        return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
 }
index 3c47865..e15e206 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 # -*- coding: utf-8 -*-
 #
index 4118eb4..ebea21d 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 
 open (IN,"ktest.pl");
index 54188ee..4e24509 100755 (executable)
@@ -1499,17 +1499,16 @@ sub dodie {
        my $log_file;
 
        if (defined($opt{"LOG_FILE"})) {
-           my $whence = 0; # beginning of file
-           my $pos = $test_log_start;
+           my $whence = 2; # End of file
+           my $log_size = tell LOG;
+           my $size = $log_size - $test_log_start;
 
            if (defined($mail_max_size)) {
-               my $log_size = tell LOG;
-               $log_size -= $test_log_start;
-               if ($log_size > $mail_max_size) {
-                   $whence = 2; # end of file
-                   $pos = - $mail_max_size;
+               if ($size > $mail_max_size) {
+                   $size = $mail_max_size;
                }
            }
+           my $pos = - $size;
            $log_file = "$tmpdir/log";
            open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)";
            open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n";
@@ -4253,7 +4252,12 @@ sub do_send_mail {
     $mail_command =~ s/\$SUBJECT/$subject/g;
     $mail_command =~ s/\$MESSAGE/$message/g;
 
-    run_command $mail_command;
+    my $ret = run_command $mail_command;
+    if (!$ret && defined($file)) {
+       # try again without the file
+       $message .= "\n\n*** FAILED TO SEND LOG ***\n\n";
+       do_send_email($subject, $message);
+    }
 }
 
 sub send_email {
index d4f7846..21516e2 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 #
 # A thin wrapper on top of the KUnit Kernel
index 497ab51..b593f44 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 #
 # A collection of tests for tools/testing/kunit/kunit.py
index 2480226..0b3af55 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2020 ARM Limited
 
-CFLAGS += -std=gnu99 -I.
+CFLAGS += -std=gnu99 -I. -lpthread
 SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
 PROGS := $(patsubst %.c,%,$(SRCS))
 
diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
new file mode 100644 (file)
index 0000000..a876db1
--- /dev/null
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+# define PR_TAGGED_ADDR_ENABLE  (1UL << 0)
+# define PR_MTE_TCF_SHIFT      1
+# define PR_MTE_TCF_NONE       (0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC       (1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC      (2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK       (3UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TAG_SHIFT      3
+# define PR_MTE_TAG_MASK       (0xffffUL << PR_MTE_TAG_SHIFT)
+
+#include "mte_def.h"
+
+#define NUM_ITERATIONS         1024
+#define MAX_THREADS            5
+#define THREAD_ITERATIONS      1000
+
+void *execute_thread(void *x)
+{
+       pid_t pid = *((pid_t *)x);
+       pid_t tid = gettid();
+       uint64_t prctl_tag_mask;
+       uint64_t prctl_set;
+       uint64_t prctl_get;
+       uint64_t prctl_tcf;
+
+       srand(time(NULL) ^ (pid << 16) ^ (tid << 16));
+
+       prctl_tag_mask = rand() & 0xffff;
+
+       if (prctl_tag_mask % 2)
+               prctl_tcf = PR_MTE_TCF_SYNC;
+       else
+               prctl_tcf = PR_MTE_TCF_ASYNC;
+
+       prctl_set = PR_TAGGED_ADDR_ENABLE | prctl_tcf | (prctl_tag_mask << PR_MTE_TAG_SHIFT);
+
+       for (int j = 0; j < THREAD_ITERATIONS; j++) {
+               if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_set, 0, 0, 0)) {
+                       perror("prctl() failed");
+                       goto fail;
+               }
+
+               prctl_get = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+
+               if (prctl_set != prctl_get) {
+                       ksft_print_msg("Error: prctl_set: 0x%lx != prctl_get: 0x%lx\n",
+                                               prctl_set, prctl_get);
+                       goto fail;
+               }
+       }
+
+       return (void *)KSFT_PASS;
+
+fail:
+       return (void *)KSFT_FAIL;
+}
+
+int execute_test(pid_t pid)
+{
+       pthread_t thread_id[MAX_THREADS];
+       int thread_data[MAX_THREADS];
+
+       for (int i = 0; i < MAX_THREADS; i++)
+               pthread_create(&thread_id[i], NULL,
+                              execute_thread, (void *)&pid);
+
+       for (int i = 0; i < MAX_THREADS; i++)
+               pthread_join(thread_id[i], (void *)&thread_data[i]);
+
+       for (int i = 0; i < MAX_THREADS; i++)
+               if (thread_data[i] == KSFT_FAIL)
+                       return KSFT_FAIL;
+
+       return KSFT_PASS;
+}
+
+int mte_gcr_fork_test(void)
+{
+       pid_t pid;
+       int results[NUM_ITERATIONS];
+       pid_t cpid;
+       int res;
+
+       for (int i = 0; i < NUM_ITERATIONS; i++) {
+               pid = fork();
+
+               if (pid < 0)
+                       return KSFT_FAIL;
+
+               if (pid == 0) {
+                       cpid = getpid();
+
+                       res = execute_test(cpid);
+
+                       exit(res);
+               }
+       }
+
+       for (int i = 0; i < NUM_ITERATIONS; i++) {
+               wait(&res);
+
+               if (WIFEXITED(res))
+                       results[i] = WEXITSTATUS(res);
+               else
+                       --i;
+       }
+
+       for (int i = 0; i < NUM_ITERATIONS; i++)
+               if (results[i] == KSFT_FAIL)
+                       return KSFT_FAIL;
+
+       return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+       int err;
+
+       err = mte_default_setup();
+       if (err)
+               return err;
+
+       ksft_set_plan(1);
+
+       evaluate_test(mte_gcr_fork_test(),
+               "Verify that GCR_EL1 is set correctly on context switch\n");
+
+       mte_restore_setup();
+       ksft_print_cnts();
+
+       return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
index b99bb8e..edaffd4 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 
 # Copyright (C) 2017 Netronome Systems, Inc.
 # Copyright (c) 2019 Mellanox Technologies. All rights reserved
index 87e16d6..73eb29c 100644 (file)
 #include "../clone3/clone3_selftests.h"
 
 #ifndef __NR_close_range
-#define __NR_close_range -1
+       #if defined __alpha__
+               #define __NR_close_range 546
+       #elif defined _MIPS_SIM
+               #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
+                       #define __NR_close_range (436 + 4000)
+               #endif
+               #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
+                       #define __NR_close_range (436 + 6000)
+               #endif
+               #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
+                       #define __NR_close_range (436 + 5000)
+               #endif
+       #elif defined __ia64__
+               #define __NR_close_range (436 + 1024)
+       #else
+               #define __NR_close_range 436
+       #endif
 #endif
 
 #ifndef CLOSE_RANGE_UNSHARE
@@ -102,7 +118,7 @@ TEST(close_range_unshare)
        int i, ret, status;
        pid_t pid;
        int open_fds[101];
-       struct clone_args args = {
+       struct __clone_args args = {
                .flags = CLONE_FILES,
                .exit_signal = SIGCHLD,
        };
@@ -191,7 +207,7 @@ TEST(close_range_unshare_capped)
        int i, ret, status;
        pid_t pid;
        int open_fds[101];
-       struct clone_args args = {
+       struct __clone_args args = {
                .flags = CLONE_FILES,
                .exit_signal = SIGCHLD,
        };
@@ -241,7 +257,7 @@ TEST(close_range_cloexec)
                fd = open("/dev/null", O_RDONLY);
                ASSERT_GE(fd, 0) {
                        if (errno == ENOENT)
-                               XFAIL(return, "Skipping test since /dev/null does not exist");
+                               SKIP(return, "Skipping test since /dev/null does not exist");
                }
 
                open_fds[i] = fd;
@@ -250,9 +266,9 @@ TEST(close_range_cloexec)
        ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
        if (ret < 0) {
                if (errno == ENOSYS)
-                       XFAIL(return, "close_range() syscall not supported");
+                       SKIP(return, "close_range() syscall not supported");
                if (errno == EINVAL)
-                       XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+                       SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
        }
 
        /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
@@ -297,5 +313,258 @@ TEST(close_range_cloexec)
        }
 }
 
+TEST(close_range_cloexec_unshare)
+{
+       int i, ret;
+       int open_fds[101];
+       struct rlimit rlimit;
+
+       for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+               int fd;
+
+               fd = open("/dev/null", O_RDONLY);
+               ASSERT_GE(fd, 0) {
+                       if (errno == ENOENT)
+                               SKIP(return, "Skipping test since /dev/null does not exist");
+               }
+
+               open_fds[i] = fd;
+       }
+
+       ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+       if (ret < 0) {
+               if (errno == ENOSYS)
+                       SKIP(return, "close_range() syscall not supported");
+               if (errno == EINVAL)
+                       SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+       }
+
+       /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
+       ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+       rlimit.rlim_cur = 25;
+       ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+       /* Set close-on-exec for two ranges: [0-50] and [75-100].  */
+       ret = sys_close_range(open_fds[0], open_fds[50],
+                             CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+       ASSERT_EQ(0, ret);
+       ret = sys_close_range(open_fds[75], open_fds[100],
+                             CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+       ASSERT_EQ(0, ret);
+
+       for (i = 0; i <= 50; i++) {
+               int flags = fcntl(open_fds[i], F_GETFD);
+
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+       }
+
+       for (i = 51; i <= 74; i++) {
+               int flags = fcntl(open_fds[i], F_GETFD);
+
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, 0);
+       }
+
+       for (i = 75; i <= 100; i++) {
+               int flags = fcntl(open_fds[i], F_GETFD);
+
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+       }
+
+       /* Test a common pattern.  */
+       ret = sys_close_range(3, UINT_MAX,
+                             CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+       for (i = 0; i <= 100; i++) {
+               int flags = fcntl(open_fds[i], F_GETFD);
+
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+       }
+}
+
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_syzbot)
+{
+       int fd1, fd2, fd3, flags, ret, status;
+       pid_t pid;
+       struct __clone_args args = {
+               .flags = CLONE_FILES,
+               .exit_signal = SIGCHLD,
+       };
+
+       /* Create a huge gap in the fd table. */
+       fd1 = open("/dev/null", O_RDWR);
+       EXPECT_GT(fd1, 0);
+
+       fd2 = dup2(fd1, 1000);
+       EXPECT_GT(fd2, 0);
+
+       pid = sys_clone3(&args, sizeof(args));
+       ASSERT_GE(pid, 0);
+
+       if (pid == 0) {
+               ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
+               if (ret)
+                       exit(EXIT_FAILURE);
+
+               /*
+                        * We now have a private file descriptor table and all
+                        * our open fds should still be open but made
+                        * close-on-exec.
+                        */
+               flags = fcntl(fd1, F_GETFD);
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+               flags = fcntl(fd2, F_GETFD);
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+               fd3 = dup2(fd1, 42);
+               EXPECT_GT(fd3, 0);
+
+               /*
+                        * Duplicating the file descriptor must remove the
+                        * FD_CLOEXEC flag.
+                        */
+               flags = fcntl(fd3, F_GETFD);
+               EXPECT_GT(flags, -1);
+               EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+               exit(EXIT_SUCCESS);
+       }
+
+       EXPECT_EQ(waitpid(pid, &status, 0), pid);
+       EXPECT_EQ(true, WIFEXITED(status));
+       EXPECT_EQ(0, WEXITSTATUS(status));
+
+       /*
+        * We had a shared file descriptor table before along with requesting
+        * close-on-exec so the original fds must not be close-on-exec.
+        */
+       flags = fcntl(fd1, F_GETFD);
+       EXPECT_GT(flags, -1);
+       EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+       flags = fcntl(fd2, F_GETFD);
+       EXPECT_GT(flags, -1);
+       EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+       fd3 = dup2(fd1, 42);
+       EXPECT_GT(fd3, 0);
+
+       flags = fcntl(fd3, F_GETFD);
+       EXPECT_GT(flags, -1);
+       EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+       EXPECT_EQ(close(fd1), 0);
+       EXPECT_EQ(close(fd2), 0);
+       EXPECT_EQ(close(fd3), 0);
+}
+
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_unshare_syzbot)
+{
+       int i, fd1, fd2, fd3, flags, ret, status;
+       pid_t pid;
+       struct __clone_args args = {
+               .flags = CLONE_FILES,
+               .exit_signal = SIGCHLD,
+       };
+
+       /*
+        * Create a huge gap in the fd table. When we now call
+        * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
+        * bound the kernel will only copy up to fd1 file descriptors into the
+        * new fd table. If the kernel is buggy and doesn't handle
+        * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
+        * descriptors and we will oops!
+        *
+        * On a buggy kernel this should immediately oops. But let's loop just
+        * to be sure.
+        */
+       fd1 = open("/dev/null", O_RDWR);
+       EXPECT_GT(fd1, 0);
+
+       fd2 = dup2(fd1, 1000);
+       EXPECT_GT(fd2, 0);
+
+       for (i = 0; i < 100; i++) {
+
+               pid = sys_clone3(&args, sizeof(args));
+               ASSERT_GE(pid, 0);
+
+               if (pid == 0) {
+                       ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
+                                                     CLOSE_RANGE_CLOEXEC);
+                       if (ret)
+                               exit(EXIT_FAILURE);
+
+                       /*
+                        * We now have a private file descriptor table and all
+                        * our open fds should still be open but made
+                        * close-on-exec.
+                        */
+                       flags = fcntl(fd1, F_GETFD);
+                       EXPECT_GT(flags, -1);
+                       EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+                       flags = fcntl(fd2, F_GETFD);
+                       EXPECT_GT(flags, -1);
+                       EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+                       fd3 = dup2(fd1, 42);
+                       EXPECT_GT(fd3, 0);
+
+                       /*
+                        * Duplicating the file descriptor must remove the
+                        * FD_CLOEXEC flag.
+                        */
+                       flags = fcntl(fd3, F_GETFD);
+                       EXPECT_GT(flags, -1);
+                       EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+                       EXPECT_EQ(close(fd1), 0);
+                       EXPECT_EQ(close(fd2), 0);
+                       EXPECT_EQ(close(fd3), 0);
+
+                       exit(EXIT_SUCCESS);
+               }
+
+               EXPECT_EQ(waitpid(pid, &status, 0), pid);
+               EXPECT_EQ(true, WIFEXITED(status));
+               EXPECT_EQ(0, WEXITSTATUS(status));
+       }
+
+       /*
+        * We created a private file descriptor table before along with
+        * requesting close-on-exec so the original fds must not be
+        * close-on-exec.
+        */
+       flags = fcntl(fd1, F_GETFD);
+       EXPECT_GT(flags, -1);
+       EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+       flags = fcntl(fd2, F_GETFD);
+       EXPECT_GT(flags, -1);
+       EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+       fd3 = dup2(fd1, 42);
+       EXPECT_GT(fd3, 0);
+
+       flags = fcntl(fd3, F_GETFD);
+       EXPECT_GT(flags, -1);
+       EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+       EXPECT_EQ(close(fd1), 0);
+       EXPECT_EQ(close(fd2), 0);
+       EXPECT_EQ(close(fd3), 0);
+}
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
new file mode 100644 (file)
index 0000000..aa8e8b5
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := dma_map_benchmark
+
+include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
new file mode 100644 (file)
index 0000000..6102ee3
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
new file mode 100644 (file)
index 0000000..7065163
--- /dev/null
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#define DMA_MAP_BENCHMARK      _IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS    1024
+#define DMA_MAP_MAX_SECONDS     300
+
+#define DMA_MAP_BIDIRECTIONAL  0
+#define DMA_MAP_TO_DEVICE      1
+#define DMA_MAP_FROM_DEVICE    2
+
+static char *directions[] = {
+       "BIDIRECTIONAL",
+       "TO_DEVICE",
+       "FROM_DEVICE",
+};
+
+struct map_benchmark {
+       __u64 avg_map_100ns; /* average map latency in 100ns */
+       __u64 map_stddev; /* standard deviation of map latency */
+       __u64 avg_unmap_100ns; /* as above */
+       __u64 unmap_stddev;
+       __u32 threads; /* how many threads will do map/unmap in parallel */
+       __u32 seconds; /* how long the test will last */
+       __s32 node; /* which numa node this benchmark will run on */
+       __u32 dma_bits; /* DMA addressing capability */
+       __u32 dma_dir; /* DMA data direction */
+       __u64 expansion[10];    /* For future use */
+};
+
+int main(int argc, char **argv)
+{
+       struct map_benchmark map;
+       int fd, opt;
+       /* default single thread, run 20 seconds on NUMA_NO_NODE */
+       int threads = 1, seconds = 20, node = -1;
+       /* default dma mask 32bit, bidirectional DMA */
+       int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+
+       int cmd = DMA_MAP_BENCHMARK;
+       char *p;
+
+       while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+               switch (opt) {
+               case 't':
+                       threads = atoi(optarg);
+                       break;
+               case 's':
+                       seconds = atoi(optarg);
+                       break;
+               case 'n':
+                       node = atoi(optarg);
+                       break;
+               case 'b':
+                       bits = atoi(optarg);
+                       break;
+               case 'd':
+                       dir = atoi(optarg);
+                       break;
+               default:
+                       return -1;
+               }
+       }
+
+       if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
+               fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
+                       DMA_MAP_MAX_THREADS);
+               exit(1);
+       }
+
+       if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
+               fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
+                       DMA_MAP_MAX_SECONDS);
+               exit(1);
+       }
+
+       /* suppose the mininum DMA zone is 1MB in the world */
+       if (bits < 20 || bits > 64) {
+               fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
+               exit(1);
+       }
+
+       if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
+                       dir != DMA_MAP_FROM_DEVICE) {
+               fprintf(stderr, "invalid dma direction\n");
+               exit(1);
+       }
+
+       fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
+       if (fd == -1) {
+               perror("open");
+               exit(1);
+       }
+
+       map.seconds = seconds;
+       map.threads = threads;
+       map.node = node;
+       map.dma_bits = bits;
+       map.dma_dir = dir;
+       if (ioctl(fd, cmd, &map)) {
+               perror("ioctl");
+               exit(1);
+       }
+
+       printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
+                       threads, seconds, node, dir[directions]);
+       printf("average map latency(us):%.1f standard deviation:%.1f\n",
+                       map.avg_map_100ns/10.0, map.map_stddev/10.0);
+       printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
+                       map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
+
+       return 0;
+}
index 8f82f99..ad7fabd 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #define _GNU_SOURCE
+#include <asm/unistd.h>
+#include <linux/time_types.h>
 #include <poll.h>
 #include <unistd.h>
 #include <assert.h>
@@ -21,6 +23,19 @@ struct epoll_mtcontext
        pthread_t waiter;
 };
 
+#ifndef __NR_epoll_pwait2
+#define __NR_epoll_pwait2 -1
+#endif
+
+static inline int sys_epoll_pwait2(int fd, struct epoll_event *events,
+                                  int maxevents,
+                                  const struct __kernel_timespec *timeout,
+                                  const sigset_t *sigset, size_t sigsetsize)
+{
+       return syscall(__NR_epoll_pwait2, fd, events, maxevents, timeout,
+                      sigset, sigsetsize);
+}
+
 static void signal_handler(int signum)
 {
 }
@@ -3377,4 +3392,61 @@ TEST(epoll61)
        close(ctx.evfd);
 }
 
+/* Equivalent to basic test epoll1, but exercising epoll_pwait2. */
+TEST(epoll62)
+{
+       int efd;
+       int sfd[2];
+       struct epoll_event e;
+
+       ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+       efd = epoll_create(1);
+       ASSERT_GE(efd, 0);
+
+       e.events = EPOLLIN;
+       ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+       ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+       EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
+       EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
+
+       close(efd);
+       close(sfd[0]);
+       close(sfd[1]);
+}
+
+/* Epoll_pwait2 basic timeout test. */
+TEST(epoll63)
+{
+       const int cfg_delay_ms = 10;
+       unsigned long long tdiff;
+       struct __kernel_timespec ts;
+       int efd;
+       int sfd[2];
+       struct epoll_event e;
+
+       ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+       efd = epoll_create(1);
+       ASSERT_GE(efd, 0);
+
+       e.events = EPOLLIN;
+       ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+       ts.tv_sec = 0;
+       ts.tv_nsec = cfg_delay_ms * 1000 * 1000;
+
+       tdiff = msecs();
+       EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, &ts, NULL, 0), 0);
+       tdiff = msecs() - tdiff;
+
+       EXPECT_GE(tdiff, cfg_delay_ms);
+
+       close(efd);
+       close(sfd[0]);
+       close(sfd[1]);
+}
+
 TEST_HARNESS_MAIN
index 31f7c2a..12a7f4c 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 # Prefix all lines with "# ", unbuffered. Command being piped in may need
 # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd".
index 7a2c242..ce8f4ad 100644 (file)
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
-/x86_64/user_msr_test
-/x86_64/vmx_preemption_timer_test
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
+/x86_64/tsc_msrs_test
+/x86_64/userspace_msr_exit_test
 /x86_64/vmx_apic_access_test
 /x86_64/vmx_close_while_nested_test
 /x86_64/vmx_dirty_log_test
+/x86_64/vmx_preemption_timer_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
 /x86_64/xss_msr_test
-/clear_dirty_log_test
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
index 3d14ef7..c7ca4fa 100644 (file)
@@ -36,7 +36,7 @@ endif
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
+LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
@@ -50,6 +50,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
@@ -58,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -70,6 +70,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
index 33218a3..4869321 100644 (file)
 #define for_each_reg(i)                                                                \
        for ((i) = 0; (i) < reg_list->n; ++(i))
 
+#define for_each_reg_filtered(i)                                               \
+       for_each_reg(i)                                                         \
+               if (!filter_reg(reg_list->reg[i]))
+
 #define for_each_missing_reg(i)                                                        \
        for ((i) = 0; (i) < blessed_n; ++(i))                                   \
                if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
 
 #define for_each_new_reg(i)                                                    \
-       for ((i) = 0; (i) < reg_list->n; ++(i))                                 \
+       for_each_reg_filtered(i)                                                \
                if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
 
 
@@ -57,6 +61,18 @@ static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
 static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
 static __u64 *blessed_reg, blessed_n;
 
+static bool filter_reg(__u64 reg)
+{
+       /*
+        * DEMUX register presence depends on the host's CLIDR_EL1.
+        * This means there's no set of them that we can bless.
+        */
+       if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+               return true;
+
+       return false;
+}
+
 static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
 {
        int i;
@@ -325,7 +341,7 @@ int main(int ac, char **av)
        struct kvm_vcpu_init init = { .target = -1, };
        int new_regs = 0, missing_regs = 0, i;
        int failed_get = 0, failed_set = 0, failed_reject = 0;
-       bool print_list = false, fixup_core_regs = false;
+       bool print_list = false, print_filtered = false, fixup_core_regs = false;
        struct kvm_vm *vm;
        __u64 *vec_regs;
 
@@ -336,8 +352,10 @@ int main(int ac, char **av)
                        fixup_core_regs = true;
                else if (strcmp(av[i], "--list") == 0)
                        print_list = true;
+               else if (strcmp(av[i], "--list-filtered") == 0)
+                       print_filtered = true;
                else
-                       fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+                       TEST_FAIL("Unknown option: %s\n", av[i]);
        }
 
        vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
@@ -350,10 +368,14 @@ int main(int ac, char **av)
        if (fixup_core_regs)
                core_reg_fixup();
 
-       if (print_list) {
+       if (print_list || print_filtered) {
                putchar('\n');
-               for_each_reg(i)
-                       print_reg(reg_list->reg[i]);
+               for_each_reg(i) {
+                       __u64 id = reg_list->reg[i];
+                       if ((print_list && !filter_reg(id)) ||
+                           (print_filtered && filter_reg(id)))
+                               print_reg(id);
+               }
                putchar('\n');
                return 0;
        }
@@ -458,6 +480,8 @@ int main(int ac, char **av)
 /*
  * The current blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
+ *
+ * The blessed list is up to date with kernel version v5.10-rc5
  */
 static __u64 base_regs[] = {
        KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -736,9 +760,6 @@ static __u64 base_regs[] = {
        ARM64_SYS_REG(3, 4, 3, 0, 0),   /* DACR32_EL2 */
        ARM64_SYS_REG(3, 4, 5, 0, 1),   /* IFSR32_EL2 */
        ARM64_SYS_REG(3, 4, 5, 3, 0),   /* FPEXC32_EL2 */
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
-       KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
 };
 static __u64 base_regs_n = ARRAY_SIZE(base_regs);
 
index 85c9b8f..9c6a7be 100644 (file)
@@ -27,6 +27,7 @@
 #define TEST_HOST_LOOP_N               2UL
 
 /* Host variables */
+static u64 dirty_log_manual_caps;
 static bool host_quit;
 static uint64_t iteration;
 static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
@@ -88,10 +89,6 @@ static void *vcpu_worker(void *data)
        return NULL;
 }
 
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
-
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                     uint64_t phys_offset, int wr_fract)
 {
@@ -106,10 +103,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        struct timespec get_dirty_log_total = (struct timespec){0};
        struct timespec vcpu_dirty_total = (struct timespec){0};
        struct timespec avg;
-#ifdef USE_CLEAR_DIRTY_LOG
        struct kvm_enable_cap cap = {};
        struct timespec clear_dirty_log_total = (struct timespec){0};
-#endif
 
        vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
@@ -120,11 +115,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        host_num_pages = vm_num_host_pages(mode, guest_num_pages);
        bmap = bitmap_alloc(host_num_pages);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-       cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
-       cap.args[0] = dirty_log_manual_caps;
-       vm_enable_cap(vm, &cap);
-#endif
+       if (dirty_log_manual_caps) {
+               cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+               cap.args[0] = dirty_log_manual_caps;
+               vm_enable_cap(vm, &cap);
+       }
 
        vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
        TEST_ASSERT(vcpu_threads, "Memory allocation failed");
@@ -190,17 +185,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
                        iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-               clock_gettime(CLOCK_MONOTONIC, &start);
-               kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
-                                      host_num_pages);
+               if (dirty_log_manual_caps) {
+                       clock_gettime(CLOCK_MONOTONIC, &start);
+                       kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+                                              host_num_pages);
 
-               ts_diff = timespec_diff_now(start);
-               clear_dirty_log_total = timespec_add(clear_dirty_log_total,
-                                                    ts_diff);
-               pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
-                       iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
-#endif
+                       ts_diff = timespec_diff_now(start);
+                       clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+                                                            ts_diff);
+                       pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+                               iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+               }
        }
 
        /* Tell the vcpu thread to quit */
@@ -220,12 +215,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                iterations, get_dirty_log_total.tv_sec,
                get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-       avg = timespec_div(clear_dirty_log_total, iterations);
-       pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
-               iterations, clear_dirty_log_total.tv_sec,
-               clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-#endif
+       if (dirty_log_manual_caps) {
+               avg = timespec_div(clear_dirty_log_total, iterations);
+               pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+                       iterations, clear_dirty_log_total.tv_sec,
+                       clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+       }
 
        free(bmap);
        free(vcpu_threads);
@@ -284,16 +279,10 @@ int main(int argc, char *argv[])
        int opt, i;
        int wr_fract = 1;
 
-#ifdef USE_CLEAR_DIRTY_LOG
        dirty_log_manual_caps =
                kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-       if (!dirty_log_manual_caps) {
-               print_skip("KVM_CLEAR_DIRTY_LOG not available");
-               exit(KSFT_SKIP);
-       }
        dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
                                  KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
 
 #ifdef __x86_64__
        guest_mode_init(VM_MODE_PXXV48_4K, true, true);
index 54da9cc..471baec 100644 (file)
 #include <unistd.h>
 #include <time.h>
 #include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <asm/barrier.h>
 
 #include "test_util.h"
 #include "kvm_util.h"
 # define test_and_clear_bit_le test_and_clear_bit
 #endif
 
+#define TEST_DIRTY_RING_COUNT          65536
+
+#define SIG_IPI SIGUSR1
+
 /*
  * Guest/Host shared variables. Ensure addr_gva2hva() and/or
  * sync_global_to/from_guest() are used when accessing from
@@ -128,6 +137,31 @@ static uint64_t host_dirty_count;
 static uint64_t host_clear_count;
 static uint64_t host_track_next_count;
 
+/* Whether dirty ring reset is requested, or finished */
+static sem_t dirty_ring_vcpu_stop;
+static sem_t dirty_ring_vcpu_cont;
+/*
+ * This is updated by the vcpu thread to tell the host whether it's a
+ * ring-full event.  It should only be read until a sem_wait() of
+ * dirty_ring_vcpu_stop and before vcpu continues to run.
+ */
+static bool dirty_ring_vcpu_ring_full;
+/*
+ * This is only used for verifying the dirty pages.  Dirty ring has a very
+ * tricky case when the ring just got full, kvm will do userspace exit due to
+ * ring full.  When that happens, the very last PFN is set but actually the
+ * data is not changed (the guest WRITE is not really applied yet), because
+ * we found that the dirty ring is full, refused to continue the vcpu, and
+ * recorded the dirty gfn with the old contents.
+ *
+ * For this specific case, it's safe to skip checking this pfn for this
+ * bit, because it's a redundant bit, and when the write happens later the bit
+ * will be set again.  We use this variable to always keep track of the latest
+ * dirty gfn we've collected, so that if a mismatch of data found later in the
+ * verifying process, we let it pass.
+ */
+static uint64_t dirty_ring_last_page;
+
 enum log_mode_t {
        /* Only use KVM_GET_DIRTY_LOG for logging */
        LOG_MODE_DIRTY_LOG = 0,
@@ -135,6 +169,9 @@ enum log_mode_t {
        /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
        LOG_MODE_CLEAR_LOG = 1,
 
+       /* Use dirty ring for logging */
+       LOG_MODE_DIRTY_RING = 2,
+
        LOG_MODE_NUM,
 
        /* Run all supported modes */
@@ -145,6 +182,26 @@ enum log_mode_t {
 static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
 /* Logging mode for current run */
 static enum log_mode_t host_log_mode;
+static pthread_t vcpu_thread;
+static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
+
+static void vcpu_kick(void)
+{
+       pthread_kill(vcpu_thread, SIG_IPI);
+}
+
+/*
+ * In our test we do signal tricks, let's use a better version of
+ * sem_wait to avoid signal interrupts
+ */
+static void sem_wait_until(sem_t *sem)
+{
+       int ret;
+
+       do
+               ret = sem_wait(sem);
+       while (ret == -1 && errno == EINTR);
+}
 
 static bool clear_log_supported(void)
 {
@@ -178,6 +235,152 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
        kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
 }
 
+static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
+                   "vcpu run failed: errno=%d", err);
+
+       TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+                   "Invalid guest sync status: exit_reason=%s\n",
+                   exit_reason_str(run->exit_reason));
+}
+
+static bool dirty_ring_supported(void)
+{
+       return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+}
+
+static void dirty_ring_create_vm_done(struct kvm_vm *vm)
+{
+       /*
+        * Switch to dirty ring mode after VM creation but before any
+        * of the vcpu creation.
+        */
+       vm_enable_dirty_ring(vm, test_dirty_ring_count *
+                            sizeof(struct kvm_dirty_gfn));
+}
+
+static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+       return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+       gfn->flags = KVM_DIRTY_GFN_F_RESET;
+}
+
+static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+                                      int slot, void *bitmap,
+                                      uint32_t num_pages, uint32_t *fetch_index)
+{
+       struct kvm_dirty_gfn *cur;
+       uint32_t count = 0;
+
+       while (true) {
+               cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
+               if (!dirty_gfn_is_dirtied(cur))
+                       break;
+               TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
+                           "%u != %u", cur->slot, slot);
+               TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
+                           "0x%llx >= 0x%x", cur->offset, num_pages);
+               //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
+               set_bit_le(cur->offset, bitmap);
+               dirty_ring_last_page = cur->offset;
+               dirty_gfn_set_collected(cur);
+               (*fetch_index)++;
+               count++;
+       }
+
+       return count;
+}
+
+static void dirty_ring_wait_vcpu(void)
+{
+       /* This makes sure that hardware PML cache flushed */
+       vcpu_kick();
+       sem_wait_until(&dirty_ring_vcpu_stop);
+}
+
+static void dirty_ring_continue_vcpu(void)
+{
+       pr_info("Notifying vcpu to continue\n");
+       sem_post(&dirty_ring_vcpu_cont);
+}
+
+static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+                                          void *bitmap, uint32_t num_pages)
+{
+       /* We only have one vcpu */
+       static uint32_t fetch_index = 0;
+       uint32_t count = 0, cleared;
+       bool continued_vcpu = false;
+
+       dirty_ring_wait_vcpu();
+
+       if (!dirty_ring_vcpu_ring_full) {
+               /*
+                * This is not a ring-full event, it's safe to allow
+                * vcpu to continue
+                */
+               dirty_ring_continue_vcpu();
+               continued_vcpu = true;
+       }
+
+       /* Only have one vcpu */
+       count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
+                                      slot, bitmap, num_pages, &fetch_index);
+
+       cleared = kvm_vm_reset_dirty_ring(vm);
+
+       /* Cleared pages should be the same as collected */
+       TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
+                   "with collected (%u)", cleared, count);
+
+       if (!continued_vcpu) {
+               TEST_ASSERT(dirty_ring_vcpu_ring_full,
+                           "Didn't continue vcpu even without ring full");
+               dirty_ring_continue_vcpu();
+       }
+
+       pr_info("Iteration %ld collected %u pages\n", iteration, count);
+}
+
+static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       /* A ucall-sync or ring-full event is allowed */
+       if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+               /* We should allow this to continue */
+               ;
+       } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
+                  (ret == -1 && err == EINTR)) {
+               /* Update the flag first before pause */
+               WRITE_ONCE(dirty_ring_vcpu_ring_full,
+                          run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
+               sem_post(&dirty_ring_vcpu_stop);
+               pr_info("vcpu stops because %s...\n",
+                       dirty_ring_vcpu_ring_full ?
+                       "dirty ring is full" : "vcpu is kicked out");
+               sem_wait_until(&dirty_ring_vcpu_cont);
+               pr_info("vcpu continues now.\n");
+       } else {
+               TEST_ASSERT(false, "Invalid guest sync status: "
+                           "exit_reason=%s\n",
+                           exit_reason_str(run->exit_reason));
+       }
+}
+
+static void dirty_ring_before_vcpu_join(void)
+{
+       /* Kick another round of vcpu just to make sure it will quit */
+       sem_post(&dirty_ring_vcpu_cont);
+}
+
 struct log_mode {
        const char *name;
        /* Return true if this mode is supported, otherwise false */
@@ -187,16 +390,29 @@ struct log_mode {
        /* Hook to collect the dirty pages into the bitmap provided */
        void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
                                     void *bitmap, uint32_t num_pages);
+       /* Hook to call when after each vcpu run */
+       void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
+       void (*before_vcpu_join) (void);
 } log_modes[LOG_MODE_NUM] = {
        {
                .name = "dirty-log",
                .collect_dirty_pages = dirty_log_collect_dirty_pages,
+               .after_vcpu_run = default_after_vcpu_run,
        },
        {
                .name = "clear-log",
                .supported = clear_log_supported,
                .create_vm_done = clear_log_create_vm_done,
                .collect_dirty_pages = clear_log_collect_dirty_pages,
+               .after_vcpu_run = default_after_vcpu_run,
+       },
+       {
+               .name = "dirty-ring",
+               .supported = dirty_ring_supported,
+               .create_vm_done = dirty_ring_create_vm_done,
+               .collect_dirty_pages = dirty_ring_collect_dirty_pages,
+               .before_vcpu_join = dirty_ring_before_vcpu_join,
+               .after_vcpu_run = dirty_ring_after_vcpu_run,
        },
 };
 
@@ -247,6 +463,22 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
        mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
 }
 
+static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       if (mode->after_vcpu_run)
+               mode->after_vcpu_run(vm, ret, err);
+}
+
+static void log_mode_before_vcpu_join(void)
+{
+       struct log_mode *mode = &log_modes[host_log_mode];
+
+       if (mode->before_vcpu_join)
+               mode->before_vcpu_join();
+}
+
 static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
        uint64_t i;
@@ -257,29 +489,44 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
 
 static void *vcpu_worker(void *data)
 {
-       int ret;
+       int ret, vcpu_fd;
        struct kvm_vm *vm = data;
        uint64_t *guest_array;
        uint64_t pages_count = 0;
-       struct kvm_run *run;
+       struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+                                                + sizeof(sigset_t));
+       sigset_t *sigset = (sigset_t *) &sigmask->sigset;
+
+       vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
+
+       /*
+        * SIG_IPI is unblocked atomically while in KVM_RUN.  It causes the
+        * ioctl to return with -EINTR, but it is still pending and we need
+        * to accept it with the sigwait.
+        */
+       sigmask->len = 8;
+       pthread_sigmask(0, NULL, sigset);
+       vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
+       sigaddset(sigset, SIG_IPI);
+       pthread_sigmask(SIG_BLOCK, sigset, NULL);
 
-       run = vcpu_state(vm, VCPU_ID);
+       sigemptyset(sigset);
+       sigaddset(sigset, SIG_IPI);
 
        guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
-       generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 
        while (!READ_ONCE(host_quit)) {
+               /* Clear any existing kick signals */
+               generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+               pages_count += TEST_PAGES_PER_LOOP;
                /* Let the guest dirty the random pages */
-               ret = _vcpu_run(vm, VCPU_ID);
-               TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
-               if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
-                       pages_count += TEST_PAGES_PER_LOOP;
-                       generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
-               } else {
-                       TEST_FAIL("Invalid guest sync status: "
-                                 "exit_reason=%s\n",
-                                 exit_reason_str(run->exit_reason));
+               ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+               if (ret == -1 && errno == EINTR) {
+                       int sig = -1;
+                       sigwait(sigset, &sig);
+                       assert(sig == SIG_IPI);
                }
+               log_mode_after_vcpu_run(vm, ret, errno);
        }
 
        pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -292,6 +539,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
        uint64_t step = vm_num_host_pages(mode, 1);
        uint64_t page;
        uint64_t *value_ptr;
+       uint64_t min_iter = 0;
 
        for (page = 0; page < host_num_pages; page += step) {
                value_ptr = host_test_mem + page * host_page_size;
@@ -306,14 +554,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
                }
 
                if (test_and_clear_bit_le(page, bmap)) {
+                       bool matched;
+
                        host_dirty_count++;
+
                        /*
                         * If the bit is set, the value written onto
                         * the corresponding page should be either the
                         * previous iteration number or the current one.
                         */
-                       TEST_ASSERT(*value_ptr == iteration ||
-                                   *value_ptr == iteration - 1,
+                       matched = (*value_ptr == iteration ||
+                                  *value_ptr == iteration - 1);
+
+                       if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
+                               if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
+                                       /*
+                                        * Short answer: this case is special
+                                        * only for dirty ring test where the
+                                        * page is the last page before a kvm
+                                        * dirty ring full in iteration N-2.
+                                        *
+                                        * Long answer: Assuming ring size R,
+                                        * one possible condition is:
+                                        *
+                                        *      main thr       vcpu thr
+                                        *      --------       --------
+                                        *    iter=1
+                                        *                   write 1 to page 0~(R-1)
+                                        *                   full, vmexit
+                                        *    collect 0~(R-1)
+                                        *    kick vcpu
+                                        *                   write 1 to (R-1)~(2R-2)
+                                        *                   full, vmexit
+                                        *    iter=2
+                                        *    collect (R-1)~(2R-2)
+                                        *    kick vcpu
+                                        *                   write 1 to (2R-2)
+                                        *                   (NOTE!!! "1" cached in cpu reg)
+                                        *                   write 2 to (2R-1)~(3R-3)
+                                        *                   full, vmexit
+                                        *    iter=3
+                                        *    collect (2R-2)~(3R-3)
+                                        *    (here if we read value on page
+                                        *     "2R-2" is 1, while iter=3!!!)
+                                        *
+                                        * This however can only happen once per iteration.
+                                        */
+                                       min_iter = iteration - 1;
+                                       continue;
+                               } else if (page == dirty_ring_last_page) {
+                                       /*
+                                        * Please refer to comments in
+                                        * dirty_ring_last_page.
+                                        */
+                                       continue;
+                               }
+                       }
+
+                       TEST_ASSERT(matched,
                                    "Set page %"PRIu64" value %"PRIu64
                                    " incorrect (iteration=%"PRIu64")",
                                    page, *value_ptr, iteration);
@@ -378,7 +676,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
                     unsigned long interval, uint64_t phys_offset)
 {
-       pthread_t vcpu_thread;
        struct kvm_vm *vm;
        unsigned long *bmap;
 
@@ -443,9 +740,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
        /* Cache the HVA pointer of the region */
        host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
 
-#ifdef __x86_64__
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-#endif
        ucall_init(vm, NULL);
 
        /* Export the shared variables to the guest */
@@ -476,6 +770,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
        /* Tell the vcpu thread to quit */
        host_quit = true;
+       log_mode_before_vcpu_join();
        pthread_join(vcpu_thread, NULL);
 
        pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
@@ -506,6 +801,9 @@ static void help(char *name)
        printf("usage: %s [-h] [-i iterations] [-I interval] "
               "[-p offset] [-m mode]\n", name);
        puts("");
+       printf(" -c: specify dirty ring size, in number of entries\n");
+       printf("     (only useful for dirty-ring test; default: %"PRIu32")\n",
+              TEST_DIRTY_RING_COUNT);
        printf(" -i: specify iteration counts (default: %"PRIu64")\n",
               TEST_HOST_LOOP_N);
        printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
@@ -536,6 +834,9 @@ int main(int argc, char *argv[])
        unsigned int mode;
        int opt, i, j;
 
+       sem_init(&dirty_ring_vcpu_stop, 0, 0);
+       sem_init(&dirty_ring_vcpu_cont, 0, 0);
+
 #ifdef __x86_64__
        guest_mode_init(VM_MODE_PXXV48_4K, true, true);
 #endif
@@ -558,8 +859,11 @@ int main(int argc, char *argv[])
        guest_mode_init(VM_MODE_P40V48_4K, true, true);
 #endif
 
-       while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
+       while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) {
                switch (opt) {
+               case 'c':
+                       test_dirty_ring_count = strtol(optarg, NULL, 10);
+                       break;
                case 'i':
                        iterations = strtol(optarg, NULL, 10);
                        break;
index 7d29aa7..dfa9d36 100644 (file)
@@ -45,13 +45,28 @@ enum vm_guest_mode {
 };
 
 #if defined(__aarch64__)
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+
+#define VM_MODE_DEFAULT                        VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT                 12U
+#define ptes_per_page(page_size)       ((page_size) / 8)
+
 #elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#else
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+
+#define VM_MODE_DEFAULT                        VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT                 12U
+#define ptes_per_page(page_size)       ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT                        VM_MODE_P52V48_4K
+#define MIN_PAGE_SHIFT                 12U
+#define ptes_per_page(page_size)       ((page_size) / 16)
+
 #endif
 
+#define MIN_PAGE_SIZE          (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE      ptes_per_page(MIN_PAGE_SIZE)
+
 #define vm_guest_mode_string(m) vm_guest_mode_string[m]
 extern const char * const vm_guest_mode_string[];
 
@@ -74,6 +89,7 @@ void kvm_vm_release(struct kvm_vm *vmp);
 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
                            uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
 
 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
                       size_t len);
@@ -114,6 +130,8 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
                void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
@@ -146,6 +164,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
                          struct kvm_guest_debug *debug);
@@ -199,6 +218,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
                          struct kvm_nested_state *state, bool ignore_error);
 #endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
 
 const char *exit_reason_str(unsigned int exit_reason);
 
@@ -246,6 +266,16 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
                                 void *guest_code);
 
+/* Same as vm_create_default, but can be used for more than one vcpu */
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+                                           uint32_t num_percpu_pages, void *guest_code,
+                                           uint32_t vcpuids[]);
+
+/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+                                   uint64_t extra_mem_pages, uint32_t num_percpu_pages,
+                                   void *guest_code, uint32_t vcpuids[]);
+
 /*
  * Adds a vCPU with reasonable defaults (e.g. a stack)
  *
index 2618052..239421e 100644 (file)
@@ -179,10 +179,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
 
                vm_vcpu_add_default(vm, vcpu_id, guest_code);
 
-#ifdef __x86_64__
-               vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
                vcpu_args->vcpu_id = vcpu_id;
                vcpu_args->gva = guest_test_virt_mem +
                                 (vcpu_id * vcpu_memory_bytes);
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
new file mode 100644 (file)
index 0000000..b0ed713
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
index 8e61340..90cd598 100644 (file)
@@ -27,6 +27,7 @@
 #define X86_CR4_OSFXSR         (1ul << 9)
 #define X86_CR4_OSXMMEXCPT     (1ul << 10)
 #define X86_CR4_UMIP           (1ul << 11)
+#define X86_CR4_LA57           (1ul << 12)
 #define X86_CR4_VMXE           (1ul << 13)
 #define X86_CR4_SMXE           (1ul << 14)
 #define X86_CR4_FSGSBASE       (1ul << 16)
 #define X86_CR4_SMAP           (1ul << 21)
 #define X86_CR4_PKE            (1ul << 22)
 
+/* CPUID.1.ECX */
+#define CPUID_VMX              (1ul << 5)
+#define CPUID_SMX              (1ul << 6)
+#define CPUID_PCID             (1ul << 17)
+#define CPUID_XSAVE            (1ul << 26)
+
+/* CPUID.7.EBX */
+#define CPUID_FSGSBASE         (1ul << 0)
+#define CPUID_SMEP             (1ul << 7)
+#define CPUID_SMAP             (1ul << 20)
+
+/* CPUID.7.ECX */
+#define CPUID_UMIP             (1ul << 2)
+#define CPUID_PKU              (1ul << 3)
+#define CPUID_LA57             (1ul << 16)
+
 #define UNEXPECTED_VECTOR_PORT 0xfff0u
 
 /* General Registers in 64-Bit Mode */
index e78d7e2..65eb107 100644 (file)
 #include <stdint.h>
 #include "processor.h"
 
-#define CPUID_VMX_BIT                          5
-
-#define CPUID_VMX                              (1 << 5)
-
 /*
  * Definitions of Primary Processor-Based VM-Execution Controls.
  */
index d6c32c3..cee92d4 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Red Hat, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include <linux/compiler.h>
 
 #include "kvm_util.h"
@@ -219,21 +217,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
        }
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-                                void *guest_code)
-{
-       uint64_t ptrs_per_4k_pte = 512;
-       uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
-       struct kvm_vm *vm;
-
-       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-       vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-       return vm;
-}
-
 void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
 {
        struct kvm_vcpu_init default_init = { .target = -1, };
index 126c672..88ef706 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2018, Google LLC.
  */
 
+#define _GNU_SOURCE /* for program_invocation_name */
 #include "test_util.h"
 #include "kvm_util.h"
 #include "kvm_util_internal.h"
@@ -114,6 +115,16 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
        return r;
 }
 
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+{
+       struct kvm_enable_cap cap = { 0 };
+
+       cap.cap = KVM_CAP_DIRTY_LOG_RING;
+       cap.args[0] = ring_size;
+       vm_enable_cap(vm, &cap);
+       vm->dirty_ring_size = ring_size;
+}
+
 static void vm_open(struct kvm_vm *vm, int perm)
 {
        vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -271,6 +282,63 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
        return vm;
 }
 
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+                                   uint64_t extra_mem_pages, uint32_t num_percpu_pages,
+                                   void *guest_code, uint32_t vcpuids[])
+{
+       /* The maximum page table size for a memory region will be when the
+        * smallest pages are used. Considering each page contains x page
+        * table descriptors, the total extra size for page tables (for extra
+        * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
+        * than N/x*2.
+        */
+       uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
+       uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
+       uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+       struct kvm_vm *vm;
+       int i;
+
+       TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+                   "nr_vcpus = %d too large for host, max-vcpus = %d",
+                   nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+
+       pages = vm_adjust_num_guest_pages(mode, pages);
+       vm = vm_create(mode, pages, O_RDWR);
+
+       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+#ifdef __x86_64__
+       vm_create_irqchip(vm);
+#endif
+
+       for (i = 0; i < nr_vcpus; ++i) {
+               uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
+
+               vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+#ifdef __x86_64__
+               vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+#endif
+       }
+
+       return vm;
+}
+
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+                                           uint32_t num_percpu_pages, void *guest_code,
+                                           uint32_t vcpuids[])
+{
+       return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
+                                   num_percpu_pages, guest_code, vcpuids);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+                                void *guest_code)
+{
+       return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
+                                           (uint32_t []){ vcpuid });
+}
+
 /*
  * VM Restart
  *
@@ -328,6 +396,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
                    __func__, strerror(-ret));
 }
 
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+       return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
+}
+
 /*
  * Userspace Memory Region Find
  *
@@ -432,10 +505,17 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
  *
  * Removes a vCPU from a VM and frees its resources.
  */
-static void vm_vcpu_rm(struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
 {
        int ret;
 
+       if (vcpu->dirty_gfns) {
+               ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
+               TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
+                           "rc: %i errno: %i", ret, errno);
+               vcpu->dirty_gfns = NULL;
+       }
+
        ret = munmap(vcpu->state, sizeof(*vcpu->state));
        TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
                "errno: %i", ret, errno);
@@ -453,7 +533,7 @@ void kvm_vm_release(struct kvm_vm *vmp)
        int ret;
 
        list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
-               vm_vcpu_rm(vcpu);
+               vm_vcpu_rm(vmp, vcpu);
 
        ret = close(vmp->fd);
        TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -1233,6 +1313,15 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
        return rc;
 }
 
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+       TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+       return vcpu->fd;
+}
+
 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
 {
        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1561,6 +1650,42 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
        return ret;
 }
 
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct vcpu *vcpu;
+       uint32_t size = vm->dirty_ring_size;
+
+       TEST_ASSERT(size > 0, "Should enable dirty ring first");
+
+       vcpu = vcpu_find(vm, vcpuid);
+
+       TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
+
+       if (!vcpu->dirty_gfns) {
+               void *addr;
+
+               addr = mmap(NULL, size, PROT_READ,
+                           MAP_PRIVATE, vcpu->fd,
+                           vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+               TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
+
+               addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
+                           MAP_PRIVATE, vcpu->fd,
+                           vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+               TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
+
+               addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                           MAP_SHARED, vcpu->fd,
+                           vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+               TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+
+               vcpu->dirty_gfns = addr;
+               vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
+       }
+
+       return vcpu->dirty_gfns;
+}
+
 /*
  * VM Ioctl
  *
@@ -1583,6 +1708,32 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 }
 
 /*
+ * KVM system ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a KVM fd.
+ */
+void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+       int ret;
+
+       ret = ioctl(vm->kvm_fd, cmd, arg);
+       TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
+               cmd, ret, errno, strerror(errno));
+}
+
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+       return ioctl(vm->kvm_fd, cmd, arg);
+}
+
+/*
  * VM Dump
  *
  * Input Args:
@@ -1654,6 +1805,9 @@ static struct exit_reason {
        {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
        {KVM_EXIT_OSI, "OSI"},
        {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+       {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
+       {KVM_EXIT_X86_RDMSR, "RDMSR"},
+       {KVM_EXIT_X86_WRMSR, "WRMSR"},
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
        {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
 #endif
index f07d383..34465dc 100644 (file)
@@ -28,6 +28,9 @@ struct vcpu {
        uint32_t id;
        int fd;
        struct kvm_run *state;
+       struct kvm_dirty_gfn *dirty_gfns;
+       uint32_t fetch_index;
+       uint32_t dirty_gfns_count;
 };
 
 struct kvm_vm {
@@ -52,6 +55,7 @@ struct kvm_vm {
        vm_vaddr_t tss;
        vm_vaddr_t idt;
        vm_vaddr_t handlers;
+       uint32_t dirty_ring_size;
 };
 
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
new file mode 100644 (file)
index 0000000..86b9e61
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID        6
+
+#define ICPT_INSTRUCTION       0x04
+#define IPA0_DIAG              0x8300
+
+static void guest_code(void)
+{
+       uint64_t diag318_info = 0x12345678;
+
+       asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       uint64_t reg;
+       uint64_t diag318_info;
+
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
+       vcpu_run(vm, VCPU_ID);
+       run = vcpu_state(vm, VCPU_ID);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+                   "DIAGNOSE 0x0318 instruction was not intercepted");
+       TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+                   "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+       TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+                   "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+       reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+       diag318_info = run->s.regs.gprs[reg];
+
+       TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+       kvm_vm_free(vm);
+
+       return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+       static uint64_t diag318_info;
+       static bool printed_skip;
+
+       /*
+        * If KVM does not support diag318, then return 0 to
+        * ensure tests do not break.
+        */
+       if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) {
+               if (!printed_skip) {
+                       fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+                               "Skipping diag318 test.\n");
+                       printed_skip = true;
+               }
+               return 0;
+       }
+
+       /*
+        * If a test has previously requested the diag318 info,
+        * then don't bother spinning up a temporary VM again.
+        */
+       if (!diag318_info)
+               diag318_info = diag318_handler();
+
+       return diag318_info;
+}
index 7349bb2..0152f35 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 2019, Red Hat, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "processor.h"
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -160,26 +158,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
        virt_dump_region(stream, vm, indent, vm->pgd);
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-                                void *guest_code)
-{
-       /*
-        * The additional amount of pages required for the page tables is:
-        * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
-        * which is definitely smaller than (n / 256) * 2.
-        */
-       uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
-       struct kvm_vm *vm;
-
-       vm = vm_create(VM_MODE_DEFAULT,
-                      DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-       vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-       return vm;
-}
-
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 {
        size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
index d10c5c0..95e1a75 100644 (file)
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Google LLC.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "test_util.h"
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -731,36 +729,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-                                void *guest_code)
-{
-       struct kvm_vm *vm;
-       /*
-        * For x86 the maximum page table size for a memory region
-        * will be when only 4K pages are used.  In that case the
-        * total extra size for page tables (for extra N pages) will
-        * be: N/512+N/512^2+N/512^3+... which is definitely smaller
-        * than N/512*2.
-        */
-       uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
-
-       /* Create VM */
-       vm = vm_create(VM_MODE_DEFAULT,
-                      DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
-                      O_RDWR);
-
-       /* Setup guest code */
-       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-
-       /* Setup IRQ Chip */
-       vm_create_irqchip(vm);
-
-       /* Add the first vCPU. */
-       vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-       return vm;
-}
-
 /*
  * VCPU Get MSR
  *
index 5731ccf..caf7b88 100644 (file)
@@ -20,6 +20,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
+#include "diag318_test_handler.h"
 
 #define VCPU_ID 5
 
@@ -70,7 +71,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
 
 #undef REG_COMPARE
 
-#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
 #define INVALID_SYNC_FIELD 0x80000000
 
 int main(int argc, char *argv[])
@@ -152,6 +153,12 @@ int main(int argc, char *argv[])
 
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+
+       if (get_diag318_info() > 0) {
+               run->s.regs.diag318 = get_diag318_info();
+               run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+       }
+
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
@@ -164,6 +171,9 @@ int main(int argc, char *argv[])
        TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
                    "acr0 sync regs value incorrect 0x%x.",
                    run->s.regs.acrs[0]);
+       TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
 
        vcpu_regs_get(vm, VCPU_ID, &regs);
        compare_regs(&regs, &run->s.regs);
@@ -177,6 +187,7 @@ int main(int argc, char *argv[])
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = 0;
        run->s.regs.gprs[11] = 0xDEADBEEF;
+       run->s.regs.diag318 = 0x4B1D;
        rv = _vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
@@ -186,6 +197,9 @@ int main(int argc, char *argv[])
        TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
                    "r11 sync regs value incorrect 0x%llx.",
                    run->s.regs.gprs[11]);
+       TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+                   "diag318 sync regs value incorrect 0x%llx.",
+                   run->s.regs.diag318);
 
        kvm_vm_free(vm);
 
index 6f441dd..f127ed3 100644 (file)
@@ -121,8 +121,6 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
 
        vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
                                    MEM_REGION_GPA, MEM_REGION_SLOT,
                                    MEM_REGION_SIZE / getpagesize(), 0);
index 140e919..f40fd09 100644 (file)
@@ -81,7 +81,6 @@ int main(int argc, char *argv[])
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        while (1) {
index 2fc6b3a..6097a82 100644 (file)
@@ -85,7 +85,6 @@ int main(void)
        }
 
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        /* Test software BPs - int3 */
index 7579281..37b8a78 100644 (file)
@@ -92,8 +92,6 @@ int main(int argc, char *argv[])
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
        if (!nested_vmx_supported() ||
            !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
            !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
index 745b708..88a595b 100644 (file)
@@ -46,19 +46,19 @@ static bool smt_possible(void)
 }
 
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
-                         bool evmcs_enabled)
+                         bool evmcs_expected)
 {
        int i;
        int nent = 9;
        u32 test_val;
 
-       if (evmcs_enabled)
+       if (evmcs_expected)
                nent += 1; /* 0x4000000A */
 
        TEST_ASSERT(hv_cpuid_entries->nent == nent,
                    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
                    " with evmcs=%d (returned %d)",
-                   nent, evmcs_enabled, hv_cpuid_entries->nent);
+                   nent, evmcs_expected, hv_cpuid_entries->nent);
 
        for (i = 0; i < hv_cpuid_entries->nent; i++) {
                struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
@@ -68,7 +68,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                            "function %x is our of supported range",
                            entry->function);
 
-               TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+               TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A),
                            "0x4000000A leaf should not be reported");
 
                TEST_ASSERT(entry->index == 0,
@@ -87,7 +87,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
                        TEST_ASSERT(entry->eax == test_val,
                                    "Wrong max leaf report in 0x40000000.EAX: %x"
                                    " (evmcs=%d)",
-                                   entry->eax, evmcs_enabled
+                                   entry->eax, evmcs_expected
                                );
                        break;
                case 0x40000004:
@@ -110,20 +110,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 
 }
 
-void test_hv_cpuid_e2big(struct kvm_vm *vm)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
 {
        static struct kvm_cpuid2 cpuid = {.nent = 0};
        int ret;
 
-       ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+       if (!system)
+               ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+       else
+               ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
 
        TEST_ASSERT(ret == -1 && errno == E2BIG,
-                   "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
-                   " it should have: %d %d", ret, errno);
+                   "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+                   " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
 }
 
 
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system)
 {
        int nent = 20; /* should be enough */
        static struct kvm_cpuid2 *cpuid;
@@ -137,7 +140,10 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 
        cpuid->nent = nent;
 
-       vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+       if (!system)
+               vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+       else
+               kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
 
        return cpuid;
 }
@@ -146,45 +152,50 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 int main(int argc, char *argv[])
 {
        struct kvm_vm *vm;
-       int rv, stage;
        struct kvm_cpuid2 *hv_cpuid_entries;
-       bool evmcs_enabled;
 
        /* Tell stdout not to buffer its content */
        setbuf(stdout, NULL);
 
-       rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
-       if (!rv) {
+       if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) {
                print_skip("KVM_CAP_HYPERV_CPUID not supported");
                exit(KSFT_SKIP);
        }
 
-       for (stage = 0; stage < 3; stage++) {
-               evmcs_enabled = false;
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-               vm = vm_create_default(VCPU_ID, 0, guest_code);
-               switch (stage) {
-               case 0:
-                       test_hv_cpuid_e2big(vm);
-                       continue;
-               case 1:
-                       break;
-               case 2:
-                       if (!nested_vmx_supported() ||
-                           !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-                               print_skip("Enlightened VMCS is unsupported");
-                               continue;
-                       }
-                       vcpu_enable_evmcs(vm, VCPU_ID);
-                       evmcs_enabled = true;
-                       break;
-               }
+       /* Test vCPU ioctl version */
+       test_hv_cpuid_e2big(vm, false);
+
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+       test_hv_cpuid(hv_cpuid_entries, false);
+       free(hv_cpuid_entries);
 
-               hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-               test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
-               free(hv_cpuid_entries);
-               kvm_vm_free(vm);
+       if (!nested_vmx_supported() ||
+           !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+               print_skip("Enlightened VMCS is unsupported");
+               goto do_sys;
        }
+       vcpu_enable_evmcs(vm, VCPU_ID);
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+       test_hv_cpuid(hv_cpuid_entries, true);
+       free(hv_cpuid_entries);
+
+do_sys:
+       /* Test system ioctl version */
+       if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+               print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+               goto out;
+       }
+
+       test_hv_cpuid_e2big(vm, true);
+
+       hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true);
+       test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
+       free(hv_cpuid_entries);
+
+out:
+       kvm_vm_free(vm);
 
        return 0;
 }
index b10a274..732b244 100644 (file)
@@ -211,8 +211,8 @@ int main(void)
        struct kvm_vm *vm;
 
        if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
-               pr_info("will skip kvm paravirt restriction tests.\n");
-               return 0;
+               print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported");
+               exit(KSFT_SKIP);
        }
 
        vm = vm_create_default(VCPU_ID, 0, guest_main);
index 9f76561..318be0b 100644 (file)
 
 #define VCPU_ID                  5
 
+static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig,
+                                uint64_t feature_bit)
+{
+       struct kvm_sregs sregs;
+       int rc;
+
+       /* Skip the sub-test, the feature is supported. */
+       if (orig->cr4 & feature_bit)
+               return;
+
+       memcpy(&sregs, orig, sizeof(sregs));
+       sregs.cr4 |= feature_bit;
+
+       rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
+
+       /* Sanity check that KVM didn't change anything. */
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
+}
+
+static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm)
+{
+       struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7;
+       uint64_t cr4;
+
+       cpuid_1 = kvm_get_supported_cpuid_entry(1);
+       cpuid_7 = kvm_get_supported_cpuid_entry(7);
+
+       cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+             X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+             X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+       if (cpuid_7->ecx & CPUID_UMIP)
+               cr4 |= X86_CR4_UMIP;
+       if (cpuid_7->ecx & CPUID_LA57)
+               cr4 |= X86_CR4_LA57;
+       if (cpuid_1->ecx & CPUID_VMX)
+               cr4 |= X86_CR4_VMXE;
+       if (cpuid_1->ecx & CPUID_SMX)
+               cr4 |= X86_CR4_SMXE;
+       if (cpuid_7->ebx & CPUID_FSGSBASE)
+               cr4 |= X86_CR4_FSGSBASE;
+       if (cpuid_1->ecx & CPUID_PCID)
+               cr4 |= X86_CR4_PCIDE;
+       if (cpuid_1->ecx & CPUID_XSAVE)
+               cr4 |= X86_CR4_OSXSAVE;
+       if (cpuid_7->ebx & CPUID_SMEP)
+               cr4 |= X86_CR4_SMEP;
+       if (cpuid_7->ebx & CPUID_SMAP)
+               cr4 |= X86_CR4_SMAP;
+       if (cpuid_7->ecx & CPUID_PKU)
+               cr4 |= X86_CR4_PKE;
+
+       return cr4;
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_sregs sregs;
        struct kvm_vm *vm;
+       uint64_t cr4;
        int rc;
 
        /* Tell stdout not to buffer its content */
        setbuf(stdout, NULL);
 
-       /* Create VM */
+       /*
+        * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+        * use it to verify all supported CR4 bits can be set prior to defining
+        * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+        */
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+       vm_vcpu_add(vm, VCPU_ID);
+
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+
+       sregs.cr4 |= calc_cr4_feature_bits(vm);
+       cr4 = sregs.cr4;
+
+       rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+       TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+                   sregs.cr4, cr4);
+
+       /* Verify all unsupported features are rejected by KVM. */
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP);
+       test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE);
+       kvm_vm_free(vm);
+
+       /* Create a "real" VM and verify APIC_BASE can be set. */
        vm = vm_create_default(VCPU_ID, 0, NULL);
 
        vcpu_sregs_get(vm, VCPU_ID, &sregs);
index ae39a22..613c42c 100644 (file)
@@ -102,8 +102,6 @@ int main(int argc, char *argv[])
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
        run = vcpu_state(vm, VCPU_ID);
 
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
index f6c8b90..32854c1 100644 (file)
@@ -165,7 +165,6 @@ int main(int argc, char *argv[])
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        vcpu_regs_get(vm, VCPU_ID, &regs1);
index 0e1adb4..be2ca15 100644 (file)
@@ -44,7 +44,6 @@ int main(int argc, char *argv[])
        nested_svm_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        vcpu_alloc_svm(vm, &svm_gva);
        vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
index f8e7611..e357d8e 100644 (file)
@@ -107,7 +107,6 @@ int main(void)
        uint64_t val;
 
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        val = 0;
        ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
deleted file mode 100644 (file)
index cbe1b08..0000000
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER
- *
- * Copyright (C) 2020, Amazon Inc.
- *
- * This is a functional test to verify that we can deflect MSR events
- * into user space.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID                  5
-
-static u32 msr_reads, msr_writes;
-
-static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_deadbeef[1] = { 0x1 };
-
-static void deny_msr(uint8_t *bitmap, u32 msr)
-{
-       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
-
-       bitmap[idx / 8] &= ~(1 << (idx % 8));
-}
-
-static void prepare_bitmaps(void)
-{
-       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
-       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
-       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
-       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
-       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
-
-       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
-       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
-       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
-}
-
-struct kvm_msr_filter filter = {
-       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
-       .ranges = {
-               {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0x00000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_00000000_write,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
-                       .base = 0x40000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_40000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_READ,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000_read,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE,
-                       .base = 0xc0000000,
-                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-                       .bitmap = bitmap_c0000000,
-               }, {
-                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
-                       .base = 0xdeadbeef,
-                       .nmsrs = 1,
-                       .bitmap = bitmap_deadbeef,
-               },
-       },
-};
-
-struct kvm_msr_filter no_filter = {
-       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-};
-
-static void guest_msr_calls(bool trapped)
-{
-       /* This goes into the in-kernel emulation */
-       wrmsr(MSR_SYSCALL_MASK, 0);
-
-       if (trapped) {
-               /* This goes into user space emulation */
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
-       } else {
-               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
-               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
-       }
-
-       /* If trapped == true, this goes into user space emulation */
-       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
-
-       /* This goes into the in-kernel emulation */
-       rdmsr(MSR_IA32_POWER_CTL);
-
-       /* Invalid MSR, should always be handled by user space exit */
-       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
-       wrmsr(0xdeadbeef, 0x1234);
-}
-
-static void guest_code(void)
-{
-       guest_msr_calls(true);
-
-       /*
-        * Disable msr filtering, so that the kernel
-        * handles everything in the next round
-        */
-       GUEST_SYNC(0);
-
-       guest_msr_calls(false);
-
-       GUEST_DONE();
-}
-
-static int handle_ucall(struct kvm_vm *vm)
-{
-       struct ucall uc;
-
-       switch (get_ucall(vm, VCPU_ID, &uc)) {
-       case UCALL_ABORT:
-               TEST_FAIL("Guest assertion not met");
-               break;
-       case UCALL_SYNC:
-               vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter);
-               break;
-       case UCALL_DONE:
-               return 1;
-       default:
-               TEST_FAIL("Unknown ucall %lu", uc.cmd);
-       }
-
-       return 0;
-}
-
-static void handle_rdmsr(struct kvm_run *run)
-{
-       run->msr.data = run->msr.index;
-       msr_reads++;
-
-       if (run->msr.index == MSR_SYSCALL_MASK ||
-           run->msr.index == MSR_GS_BASE) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR read trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "MSR deadbeef read trap w/o inval fault");
-       }
-}
-
-static void handle_wrmsr(struct kvm_run *run)
-{
-       /* ignore */
-       msr_writes++;
-
-       if (run->msr.index == MSR_IA32_POWER_CTL) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for MSR_IA32_POWER_CTL incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-                           "MSR_IA32_POWER_CTL trap w/o access fault");
-       }
-
-       if (run->msr.index == 0xdeadbeef) {
-               TEST_ASSERT(run->msr.data == 0x1234,
-                           "MSR data for deadbeef incorrect");
-               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-                           "deadbeef trap w/o inval fault");
-       }
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_enable_cap cap = {
-               .cap = KVM_CAP_X86_USER_SPACE_MSR,
-               .args[0] = KVM_MSR_EXIT_REASON_INVAL |
-                          KVM_MSR_EXIT_REASON_UNKNOWN |
-                          KVM_MSR_EXIT_REASON_FILTER,
-       };
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       int rc;
-
-       /* Tell stdout not to buffer its content */
-       setbuf(stdout, NULL);
-
-       /* Create VM */
-       vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-       run = vcpu_state(vm, VCPU_ID);
-
-       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
-       vm_enable_cap(vm, &cap);
-
-       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-       prepare_bitmaps();
-       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
-
-       while (1) {
-               rc = _vcpu_run(vm, VCPU_ID);
-
-               TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
-
-               switch (run->exit_reason) {
-               case KVM_EXIT_X86_RDMSR:
-                       handle_rdmsr(run);
-                       break;
-               case KVM_EXIT_X86_WRMSR:
-                       handle_wrmsr(run);
-                       break;
-               case KVM_EXIT_IO:
-                       if (handle_ucall(vm))
-                               goto done;
-                       break;
-               }
-
-       }
-
-done:
-       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
-       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-
-       kvm_vm_free(vm);
-
-       return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
new file mode 100644 (file)
index 0000000..72c0d07
--- /dev/null
@@ -0,0 +1,770 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+#define KVM_FEP_LENGTH 5
+static int fep_available = 1;
+
+#define VCPU_ID              1
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel knows about. */
+                       .base = MSR_IA32_XSS,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test an MSR the kernel doesn't know about. */
+                       .base = MSR_IA32_FLUSH_CMD,
+                       .bitmap = (uint8_t*)&deny_bits,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ |
+                                KVM_MSR_FILTER_WRITE,
+                       .nmsrs = 1,
+                       /* Test a fabricated MSR that no one knows about. */
+                       .base = MSR_NON_EXISTENT,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_fs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_FS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+struct kvm_msr_filter filter_gs = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .nmsrs = 1,
+                       .base = MSR_GS_BASE,
+                       .bitmap = (uint8_t*)&deny_bits,
+               },
+       },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+       u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+       bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+       memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+       memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+       memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+       memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+       memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+       deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+       deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+       deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+       .ranges = {
+               {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0x00000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_00000000_write,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+                       .base = 0x40000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_40000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_READ,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000_read,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE,
+                       .base = 0xc0000000,
+                       .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+                       .bitmap = bitmap_c0000000,
+               }, {
+                       .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+                       .base = 0xdeadbeef,
+                       .nmsrs = 1,
+                       .bitmap = bitmap_deadbeef,
+               },
+       },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+       .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+       uint32_t a, d;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+                       "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+       return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+       uint32_t a = value;
+       uint32_t d = value >> 32;
+
+       guest_exception_count = 0;
+
+       __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+                       "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+       uint64_t data;
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+        *
+        * A GP is thrown if anything other than 0 is written to
+        * MSR_IA32_XSS.
+        */
+       data = test_rdmsr(MSR_IA32_XSS);
+       GUEST_ASSERT(data == 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 0);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       test_wrmsr(MSR_IA32_XSS, 1);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+        *
+        * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+        * from or if a value other than 1 is written to it.
+        */
+       test_rdmsr(MSR_IA32_FLUSH_CMD);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+       GUEST_ASSERT(guest_exception_count == 1);
+
+       test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       /*
+        * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+        *
+        * Test that a fabricated MSR can pass through the kernel
+        * and be handled in userspace.
+        */
+       test_wrmsr(MSR_NON_EXISTENT, 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       data = test_rdmsr(MSR_NON_EXISTENT);
+       GUEST_ASSERT(data == 2);
+       GUEST_ASSERT(guest_exception_count == 0);
+
+       /*
+        * Test to see if the instruction emulator is available (ie: the module
+        * parameter 'kvm.force_emulation_prefix=1' is set).  This instruction
+        * will #UD if it isn't available.
+        */
+       __asm__ __volatile__(KVM_FEP "nop");
+
+       if (fep_available) {
+               /* Let userspace know we aren't done. */
+               GUEST_SYNC(0);
+
+               /*
+                * Now run the same tests with the instruction emulator.
+                */
+               data = test_em_rdmsr(MSR_IA32_XSS);
+               GUEST_ASSERT(data == 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 0);
+               GUEST_ASSERT(guest_exception_count == 0);
+               test_em_wrmsr(MSR_IA32_XSS, 1);
+               GUEST_ASSERT(guest_exception_count == 1);
+
+               test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+               GUEST_ASSERT(guest_exception_count == 1);
+               test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+               GUEST_ASSERT(guest_exception_count == 0);
+
+               test_em_wrmsr(MSR_NON_EXISTENT, 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+               data = test_em_rdmsr(MSR_NON_EXISTENT);
+               GUEST_ASSERT(data == 2);
+               GUEST_ASSERT(guest_exception_count == 0);
+       }
+
+       GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+       /* This goes into the in-kernel emulation */
+       wrmsr(MSR_SYSCALL_MASK, 0);
+
+       if (trapped) {
+               /* This goes into user space emulation */
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+       } else {
+               GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+               GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+       }
+
+       /* If trapped == true, this goes into user space emulation */
+       wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+       /* This goes into the in-kernel emulation */
+       rdmsr(MSR_IA32_POWER_CTL);
+
+       /* Invalid MSR, should always be handled by user space exit */
+       GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+       wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+       guest_msr_calls(true);
+
+       /*
+        * Disable msr filtering, so that the kernel
+        * handles everything in the next round
+        */
+       GUEST_SYNC(0);
+
+       guest_msr_calls(false);
+
+       GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+       uint64_t data;
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data == MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data != MSR_GS_BASE);
+
+       /* Let userspace know to switch the filter */
+       GUEST_SYNC(0);
+
+       data = test_rdmsr(MSR_FS_BASE);
+       GUEST_ASSERT(data != MSR_FS_BASE);
+       data = test_rdmsr(MSR_GS_BASE);
+       GUEST_ASSERT(data == MSR_GS_BASE);
+
+       GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+                              char *r_start, char *r_end,
+                              char *w_start, char *w_end)
+{
+       if (regs->rip == (uintptr_t)r_start) {
+               regs->rip = (uintptr_t)r_end;
+               regs->rax = 0;
+               regs->rdx = 0;
+       } else if (regs->rip == (uintptr_t)w_start) {
+               regs->rip = (uintptr_t)w_end;
+       } else {
+               GUEST_ASSERT(!"RIP is at an unknown location!");
+       }
+
+       ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+                          &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+       __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+                          &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       fep_available = 0;
+       regs->rip += KVM_FEP_LENGTH;
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+       int rc;
+
+       rc = _vcpu_run(vm, VCPU_ID);
+       TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc;
+
+       if (run->exit_reason == KVM_EXIT_IO &&
+               get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+                       TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+                               __FILE__, uc.args[1]);
+       }
+}
+
+static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               run->msr.data = 0;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               run->msr.data = msr_non_existent_data;
+               break;
+       case MSR_FS_BASE:
+               run->msr.data = MSR_FS_BASE;
+               break;
+       case MSR_GS_BASE:
+               run->msr.data = MSR_GS_BASE;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+       TEST_ASSERT(run->msr.index == msr_index,
+                       "Unexpected msr (0x%04x), expected 0x%04x",
+                       run->msr.index, msr_index);
+
+       switch (run->msr.index) {
+       case MSR_IA32_XSS:
+               if (run->msr.data != 0)
+                       run->msr.error = 1;
+               break;
+       case MSR_IA32_FLUSH_CMD:
+               if (run->msr.data != 1)
+                       run->msr.error = 1;
+               break;
+       case MSR_NON_EXISTENT:
+               msr_non_existent_data = run->msr.data;
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+       }
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc;
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+
+       TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+                   "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+                   uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+       struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+       struct ucall uc = {};
+
+       check_for_guest_assert(vm);
+
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Unexpected exit reason: %u (%s)",
+                   run->exit_reason,
+                   exit_reason_str(run->exit_reason));
+
+       switch (get_ucall(vm, VCPU_ID, &uc)) {
+       case UCALL_SYNC:
+               break;
+       case UCALL_ABORT:
+               check_for_guest_assert(vm);
+               break;
+       case UCALL_DONE:
+               process_ucall_done(vm);
+               break;
+       default:
+               TEST_ASSERT(false, "Unexpected ucall");
+       }
+
+       return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       run_guest(vm);
+       process_rdmsr(vm, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+       run_guest(vm);
+       process_wrmsr(vm, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm)
+{
+       run_guest(vm);
+       return process_ucall(vm);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vm *vm)
+{
+       run_guest(vm);
+       process_ucall_done(vm);
+}
+
+static void test_msr_filter_allow(void) {
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_X86_USER_SPACE_MSR,
+               .args[0] = KVM_MSR_EXIT_REASON_FILTER,
+       };
+       struct kvm_vm *vm;
+       int rc;
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, &cap);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+       vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+       /* Process guest code userspace exits. */
+       run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+
+       run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+       run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+
+       run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
+       run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+
+       vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+       run_guest(vm);
+       vm_handle_exception(vm, UD_VECTOR, NULL);
+
+       if (process_ucall(vm) != UCALL_DONE) {
+               vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+
+               /* Process emulated rdmsr and wrmsr instructions. */
+               run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+
+               run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+               run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+
+               run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
+               run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+
+               /* Confirm the guest completed without issues. */
+               run_guest_then_process_ucall_done(vm);
+       } else {
+               printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+       }
+
+       kvm_vm_free(vm);
+}
+
+static int handle_ucall(struct kvm_vm *vm)
+{
+       struct ucall uc;
+
+       switch (get_ucall(vm, VCPU_ID, &uc)) {
+       case UCALL_ABORT:
+               TEST_FAIL("Guest assertion not met");
+               break;
+       case UCALL_SYNC:
+               vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+               break;
+       case UCALL_DONE:
+               return 1;
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+       run->msr.data = run->msr.index;
+       msr_reads++;
+
+       if (run->msr.index == MSR_SYSCALL_MASK ||
+           run->msr.index == MSR_GS_BASE) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR read trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "MSR deadbeef read trap w/o inval fault");
+       }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+       /* ignore */
+       msr_writes++;
+
+       if (run->msr.index == MSR_IA32_POWER_CTL) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for MSR_IA32_POWER_CTL incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+                           "MSR_IA32_POWER_CTL trap w/o access fault");
+       }
+
+       if (run->msr.index == 0xdeadbeef) {
+               TEST_ASSERT(run->msr.data == 0x1234,
+                           "MSR data for deadbeef incorrect");
+               TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+                           "deadbeef trap w/o inval fault");
+       }
+}
+
+static void test_msr_filter_deny(void) {
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_X86_USER_SPACE_MSR,
+               .args[0] = KVM_MSR_EXIT_REASON_INVAL |
+                          KVM_MSR_EXIT_REASON_UNKNOWN |
+                          KVM_MSR_EXIT_REASON_FILTER,
+       };
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       int rc;
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+       run = vcpu_state(vm, VCPU_ID);
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, &cap);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       prepare_bitmaps();
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+       while (1) {
+               rc = _vcpu_run(vm, VCPU_ID);
+
+               TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+
+               switch (run->exit_reason) {
+               case KVM_EXIT_X86_RDMSR:
+                       handle_rdmsr(run);
+                       break;
+               case KVM_EXIT_X86_WRMSR:
+                       handle_wrmsr(run);
+                       break;
+               case KVM_EXIT_IO:
+                       if (handle_ucall(vm))
+                               goto done;
+                       break;
+               }
+
+       }
+
+done:
+       TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+       TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+
+       kvm_vm_free(vm);
+}
+
+static void test_msr_permission_bitmap(void) {
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_X86_USER_SPACE_MSR,
+               .args[0] = KVM_MSR_EXIT_REASON_FILTER,
+       };
+       struct kvm_vm *vm;
+       int rc;
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap);
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+       rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+       TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+       vm_enable_cap(vm, &cap);
+
+       rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+       TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+       run_guest_then_process_rdmsr(vm, MSR_FS_BASE);
+       TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC.");
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+       run_guest_then_process_rdmsr(vm, MSR_GS_BASE);
+       run_guest_then_process_ucall_done(vm);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       /* Tell stdout not to buffer its content */
+       setbuf(stdout, NULL);
+
+       test_msr_filter_allow();
+
+       test_msr_filter_deny();
+
+       test_msr_permission_bitmap();
+
+       return 0;
+}
index 1f65342..d14888b 100644 (file)
@@ -87,7 +87,6 @@ int main(int argc, char *argv[])
        nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
        high_gpa = (1ul << paddr_width) - getpagesize();
index fe40ade..2835a17 100644 (file)
@@ -57,7 +57,6 @@ int main(int argc, char *argv[])
        nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        /* Allocate VMX pages and shared descriptors (vmx_pages). */
        vcpu_alloc_vmx(vm, &vmx_pages_gva);
index e894a63..537de10 100644 (file)
@@ -82,7 +82,6 @@ int main(int argc, char *argv[])
 
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
        vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
        run = vcpu_state(vm, VCPU_ID);
index a7737af..a07480a 100644 (file)
@@ -169,20 +169,19 @@ int main(int argc, char *argv[])
         */
        nested_vmx_check_supported();
 
+       if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+               print_skip("KVM_CAP_NESTED_STATE not supported");
+               exit(KSFT_SKIP);
+       }
+
        /* Create VM */
        vm = vm_create_default(VCPU_ID, 0, guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
        vcpu_regs_get(vm, VCPU_ID, &regs1);
 
-       if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-               vcpu_alloc_vmx(vm, &vmx_pages_gva);
-               vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-       } else {
-               pr_info("will skip vmx preemption timer checks\n");
-               goto done;
-       }
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
        for (stage = 1;; stage++) {
                _vcpu_run(vm, VCPU_ID);
index d59f3eb..5827b9b 100644 (file)
@@ -244,6 +244,22 @@ void test_vmx_nested_state(struct kvm_vm *vm)
        free(state);
 }
 
+void disable_vmx(struct kvm_vm *vm)
+{
+       struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid();
+       int i;
+
+       for (i = 0; i < cpuid->nent; ++i)
+               if (cpuid->entries[i].function == 1 &&
+                   cpuid->entries[i].index == 0)
+                       break;
+       TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found");
+
+       cpuid->entries[i].ecx &= ~CPUID_VMX;
+       vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+       cpuid->entries[i].ecx |= CPUID_VMX;
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_vm *vm;
@@ -264,6 +280,11 @@ int main(int argc, char *argv[])
 
        vm = vm_create_default(VCPU_ID, 0, 0);
 
+       /*
+        * First run tests with VMX disabled to check error handling.
+        */
+       disable_vmx(vm);
+
        /* Passing a NULL kvm_nested_state causes a EFAULT. */
        test_nested_state_expect_efault(vm, NULL);
 
index fbe8417..7e33a35 100644 (file)
@@ -132,7 +132,6 @@ int main(int argc, char *argv[])
        nested_vmx_check_supported();
 
        vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        /* Allocate VMX pages and shared descriptors (vmx_pages). */
        vcpu_alloc_vmx(vm, &vmx_pages_gva);
index 58bb7e9..834066d 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
 from subprocess import PIPE, Popen
index 995f66c..35d5d94 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 
 """
 tdc_batch.py - a script to generate TC batch file
index 5e72379..48e1f17 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 """
 tdc_multibatch.py - a thin wrapper over tdc_batch.py to generate multiple batch
index 04d563f..468435e 100644 (file)
 # define mb() abort()
 # define dma_rmb() abort()
 # define dma_wmb() abort()
+#elif defined(__aarch64__)
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() dmb(ishld)
+#define virt_wmb() dmb(ishst)
+#define virt_store_mb(var, value)  do { WRITE_ONCE(var, value); dmb(ish); } while (0)
+/* Weak barriers should be used. If not - it's a bug */
+# define mb() abort()
+# define dma_rmb() abort()
+# define dma_wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
index b14c2c3..813baf1 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef BUG_H
 #define BUG_H
 
+#include <asm/bug.h>
+
 #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
 
 #define BUILD_BUG_ON(x)
index 315e85c..0b49354 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/overflow.h>
 #include <linux/list.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
@@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr)
 #  define unlikely(x)  (__builtin_expect(!!(x), 0))
 # endif
 
+static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp)
+{
+       size_t bytes;
+
+       if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+               return NULL;
+
+       return krealloc(p, bytes, gfp);
+}
+
 #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #ifdef DEBUG
 #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
@@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr)
 #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 
-#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0)
-
 #define min(x, y) ({                           \
        typeof(x) _min1 = (x);                  \
        typeof(y) _min2 = (y);                  \
index e2c197f..62bd908 100644 (file)
@@ -111,7 +111,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 {
        struct page *page;
 
-       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+       page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!page)
                return -ENOMEM;
 
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
new file mode 100644 (file)
index 0000000..9d01299
--- /dev/null
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM dirty ring implementation
+ *
+ * Copyright 2019 Red Hat, Inc.
+ */
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/vmalloc.h>
+#include <linux/kvm_dirty_ring.h>
+#include <trace/events/kvm.h>
+
+int __weak kvm_cpu_dirty_log_size(void)
+{
+       return 0;
+}
+
+u32 kvm_dirty_ring_get_rsvd_entries(void)
+{
+       return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
+}
+
+static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
+{
+       return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
+}
+
+bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+{
+       return kvm_dirty_ring_used(ring) >= ring->soft_limit;
+}
+
+static bool kvm_dirty_ring_full(struct kvm_dirty_ring *ring)
+{
+       return kvm_dirty_ring_used(ring) >= ring->size;
+}
+
+struct kvm_dirty_ring *kvm_dirty_ring_get(struct kvm *kvm)
+{
+       struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+
+       WARN_ON_ONCE(vcpu->kvm != kvm);
+
+       return &vcpu->dirty_ring;
+}
+
+static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
+{
+       struct kvm_memory_slot *memslot;
+       int as_id, id;
+
+       as_id = slot >> 16;
+       id = (u16)slot;
+
+       if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
+               return;
+
+       memslot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
+
+       if (!memslot || (offset + __fls(mask)) >= memslot->npages)
+               return;
+
+       spin_lock(&kvm->mmu_lock);
+       kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
+       spin_unlock(&kvm->mmu_lock);
+}
+
+int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size)
+{
+       ring->dirty_gfns = vmalloc(size);
+       if (!ring->dirty_gfns)
+               return -ENOMEM;
+       memset(ring->dirty_gfns, 0, size);
+
+       ring->size = size / sizeof(struct kvm_dirty_gfn);
+       ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries();
+       ring->dirty_index = 0;
+       ring->reset_index = 0;
+       ring->index = index;
+
+       return 0;
+}
+
+static inline void kvm_dirty_gfn_set_invalid(struct kvm_dirty_gfn *gfn)
+{
+       gfn->flags = 0;
+}
+
+static inline void kvm_dirty_gfn_set_dirtied(struct kvm_dirty_gfn *gfn)
+{
+       gfn->flags = KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline bool kvm_dirty_gfn_invalid(struct kvm_dirty_gfn *gfn)
+{
+       return gfn->flags == 0;
+}
+
+static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
+{
+       return gfn->flags & KVM_DIRTY_GFN_F_RESET;
+}
+
+int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
+{
+       u32 cur_slot, next_slot;
+       u64 cur_offset, next_offset;
+       unsigned long mask;
+       int count = 0;
+       struct kvm_dirty_gfn *entry;
+       bool first_round = true;
+
+       /* This is only needed to make compilers happy */
+       cur_slot = cur_offset = mask = 0;
+
+       while (true) {
+               entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)];
+
+               if (!kvm_dirty_gfn_harvested(entry))
+                       break;
+
+               next_slot = READ_ONCE(entry->slot);
+               next_offset = READ_ONCE(entry->offset);
+
+               /* Update the flags to reflect that this GFN is reset */
+               kvm_dirty_gfn_set_invalid(entry);
+
+               ring->reset_index++;
+               count++;
+               /*
+                * Try to coalesce the reset operations when the guest is
+                * scanning pages in the same slot.
+                */
+               if (!first_round && next_slot == cur_slot) {
+                       s64 delta = next_offset - cur_offset;
+
+                       if (delta >= 0 && delta < BITS_PER_LONG) {
+                               mask |= 1ull << delta;
+                               continue;
+                       }
+
+                       /* Backwards visit, careful about overflows!  */
+                       if (delta > -BITS_PER_LONG && delta < 0 &&
+                           (mask << -delta >> -delta) == mask) {
+                               cur_offset = next_offset;
+                               mask = (mask << -delta) | 1;
+                               continue;
+                       }
+               }
+               kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+               cur_slot = next_slot;
+               cur_offset = next_offset;
+               mask = 1;
+               first_round = false;
+       }
+
+       kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+
+       trace_kvm_dirty_ring_reset(ring);
+
+       return count;
+}
+
+void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
+{
+       struct kvm_dirty_gfn *entry;
+
+       /* It should never get full */
+       WARN_ON_ONCE(kvm_dirty_ring_full(ring));
+
+       entry = &ring->dirty_gfns[ring->dirty_index & (ring->size - 1)];
+
+       entry->slot = slot;
+       entry->offset = offset;
+       /*
+        * Make sure the data is filled in before we publish this to
+        * the userspace program.  There's no paired kernel-side reader.
+        */
+       smp_wmb();
+       kvm_dirty_gfn_set_dirtied(entry);
+       ring->dirty_index++;
+       trace_kvm_dirty_ring_push(ring, slot, offset);
+}
+
+struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
+{
+       return vmalloc_to_page((void *)ring->dirty_gfns + offset * PAGE_SIZE);
+}
+
+void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
+{
+       vfree(ring->dirty_gfns);
+       ring->dirty_gfns = NULL;
+}
index c2323c2..e996989 100644 (file)
@@ -191,8 +191,12 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
        struct kvm *kvm = irqfd->kvm;
        unsigned seq;
        int idx;
+       int ret = 0;
 
        if (flags & EPOLLIN) {
+               u64 cnt;
+               eventfd_ctx_do_read(irqfd->eventfd, &cnt);
+
                idx = srcu_read_lock(&kvm->irq_srcu);
                do {
                        seq = read_seqcount_begin(&irqfd->irq_entry_sc);
@@ -204,6 +208,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
                                              false) == -EWOULDBLOCK)
                        schedule_work(&irqfd->inject);
                srcu_read_unlock(&kvm->irq_srcu, idx);
+               ret = 1;
        }
 
        if (flags & EPOLLHUP) {
@@ -227,7 +232,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
                spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
        }
 
-       return 0;
+       return ret;
 }
 
 static void
@@ -236,7 +241,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 {
        struct kvm_kernel_irqfd *irqfd =
                container_of(pt, struct kvm_kernel_irqfd, pt);
-       add_wait_queue(wqh, &irqfd->wait);
+       add_wait_queue_priority(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */
index 2541a17..5f26048 100644 (file)
@@ -63,6 +63,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
 
+#include <linux/kvm_dirty_ring.h>
+
 /* Worst case buffer size needed for holding an integer. */
 #define ITOA_MAX_LEN 12
 
@@ -415,6 +417,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 
 void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+       kvm_dirty_ring_free(&vcpu->dirty_ring);
        kvm_arch_vcpu_destroy(vcpu);
 
        /*
@@ -1362,7 +1365,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        /* Allocate/free page dirty bitmap as needed */
        if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
                new.dirty_bitmap = NULL;
-       else if (!new.dirty_bitmap) {
+       else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
                r = kvm_alloc_dirty_bitmap(&new);
                if (r)
                        return r;
@@ -1423,6 +1426,10 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
        unsigned long n;
        unsigned long any = 0;
 
+       /* Dirty ring tracking is exclusive to dirty log tracking */
+       if (kvm->dirty_ring_size)
+               return -ENXIO;
+
        *memslot = NULL;
        *is_dirty = 0;
 
@@ -1484,6 +1491,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
        unsigned long *dirty_bitmap_buffer;
        bool flush;
 
+       /* Dirty ring tracking is exclusive to dirty log tracking */
+       if (kvm->dirty_ring_size)
+               return -ENXIO;
+
        as_id = log->slot >> 16;
        id = (u16)log->slot;
        if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
@@ -1592,6 +1603,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
        unsigned long *dirty_bitmap_buffer;
        bool flush;
 
+       /* Dirty ring tracking is exclusive to dirty log tracking */
+       if (kvm->dirty_ring_size)
+               return -ENXIO;
+
        as_id = log->slot >> 16;
        id = (u16)log->slot;
        if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
@@ -2196,7 +2211,8 @@ int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_map);
 
-static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
+static void __kvm_unmap_gfn(struct kvm *kvm,
+                       struct kvm_memory_slot *memslot,
                        struct kvm_host_map *map,
                        struct gfn_to_pfn_cache *cache,
                        bool dirty, bool atomic)
@@ -2221,7 +2237,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
 #endif
 
        if (dirty)
-               mark_page_dirty_in_slot(memslot, map->gfn);
+               mark_page_dirty_in_slot(kvm, memslot, map->gfn);
 
        if (cache)
                cache->dirty |= dirty;
@@ -2235,7 +2251,7 @@ static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
 int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
                  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
 {
-       __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
+       __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
                        cache, dirty, atomic);
        return 0;
 }
@@ -2243,8 +2259,8 @@ EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
 
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
 {
-       __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
-                       dirty, false);
+       __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
+                       map, NULL, dirty, false);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 
@@ -2418,7 +2434,8 @@ int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
 
-static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
+static int __kvm_write_guest_page(struct kvm *kvm,
+                                 struct kvm_memory_slot *memslot, gfn_t gfn,
                                  const void *data, int offset, int len)
 {
        int r;
@@ -2430,7 +2447,7 @@ static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
        r = __copy_to_user((void __user *)addr + offset, data, len);
        if (r)
                return -EFAULT;
-       mark_page_dirty_in_slot(memslot, gfn);
+       mark_page_dirty_in_slot(kvm, memslot, gfn);
        return 0;
 }
 
@@ -2439,7 +2456,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
 {
        struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 
-       return __kvm_write_guest_page(slot, gfn, data, offset, len);
+       return __kvm_write_guest_page(kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
 
@@ -2448,7 +2465,7 @@ int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
 {
        struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
-       return __kvm_write_guest_page(slot, gfn, data, offset, len);
+       return __kvm_write_guest_page(vcpu->kvm, slot, gfn, data, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
 
@@ -2567,7 +2584,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
        r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
        if (r)
                return -EFAULT;
-       mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
+       mark_page_dirty_in_slot(kvm, ghc->memslot, gpa >> PAGE_SHIFT);
 
        return 0;
 }
@@ -2616,23 +2633,16 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
-int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
-{
-       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
-
-       return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
-}
-EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
-
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 {
+       const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
        gfn_t gfn = gpa >> PAGE_SHIFT;
        int seg;
        int offset = offset_in_page(gpa);
        int ret;
 
        while ((seg = next_segment(len, offset)) != 0) {
-               ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
+               ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
                if (ret < 0)
                        return ret;
                offset = 0;
@@ -2643,12 +2653,19 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn)
+void mark_page_dirty_in_slot(struct kvm *kvm,
+                            struct kvm_memory_slot *memslot,
+                            gfn_t gfn)
 {
-       if (memslot && memslot->dirty_bitmap) {
+       if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
                unsigned long rel_gfn = gfn - memslot->base_gfn;
+               u32 slot = (memslot->as_id << 16) | memslot->id;
 
-               set_bit_le(rel_gfn, memslot->dirty_bitmap);
+               if (kvm->dirty_ring_size)
+                       kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
+                                           slot, rel_gfn);
+               else
+                       set_bit_le(rel_gfn, memslot->dirty_bitmap);
        }
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
@@ -2658,7 +2675,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
        struct kvm_memory_slot *memslot;
 
        memslot = gfn_to_memslot(kvm, gfn);
-       mark_page_dirty_in_slot(memslot, gfn);
+       mark_page_dirty_in_slot(kvm, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty);
 
@@ -2667,7 +2684,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
        struct kvm_memory_slot *memslot;
 
        memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-       mark_page_dirty_in_slot(memslot, gfn);
+       mark_page_dirty_in_slot(vcpu->kvm, memslot, gfn);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
 
@@ -3008,6 +3025,17 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
 
+static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff)
+{
+#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+       return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
+           (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
+            kvm->dirty_ring_size / PAGE_SIZE);
+#else
+       return false;
+#endif
+}
+
 static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
 {
        struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
@@ -3023,6 +3051,10 @@ static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
        else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
                page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
 #endif
+       else if (kvm_page_in_dirty_ring(vcpu->kvm, vmf->pgoff))
+               page = kvm_dirty_ring_get_page(
+                   &vcpu->dirty_ring,
+                   vmf->pgoff - KVM_DIRTY_LOG_PAGE_OFFSET);
        else
                return kvm_arch_vcpu_fault(vcpu, vmf);
        get_page(page);
@@ -3036,6 +3068,14 @@ static const struct vm_operations_struct kvm_vcpu_vm_ops = {
 
 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
 {
+       struct kvm_vcpu *vcpu = file->private_data;
+       unsigned long pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+       if ((kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff) ||
+            kvm_page_in_dirty_ring(vcpu->kvm, vma->vm_pgoff + pages - 1)) &&
+           ((vma->vm_flags & VM_EXEC) || !(vma->vm_flags & VM_SHARED)))
+               return -EINVAL;
+
        vma->vm_ops = &kvm_vcpu_vm_ops;
        return 0;
 }
@@ -3116,7 +3156,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        }
 
        BUILD_BUG_ON(sizeof(struct kvm_run) > PAGE_SIZE);
-       page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+       page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
        if (!page) {
                r = -ENOMEM;
                goto vcpu_free;
@@ -3129,6 +3169,13 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        if (r)
                goto vcpu_free_run_page;
 
+       if (kvm->dirty_ring_size) {
+               r = kvm_dirty_ring_alloc(&vcpu->dirty_ring,
+                                        id, kvm->dirty_ring_size);
+               if (r)
+                       goto arch_vcpu_destroy;
+       }
+
        mutex_lock(&kvm->lock);
        if (kvm_get_vcpu_by_id(kvm, id)) {
                r = -EEXIST;
@@ -3162,6 +3209,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 
 unlock_vcpu_destroy:
        mutex_unlock(&kvm->lock);
+       kvm_dirty_ring_free(&vcpu->dirty_ring);
+arch_vcpu_destroy:
        kvm_arch_vcpu_destroy(vcpu);
 vcpu_free_run_page:
        free_page((unsigned long)vcpu->run);
@@ -3634,12 +3683,78 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
        case KVM_CAP_NR_MEMSLOTS:
                return KVM_USER_MEM_SLOTS;
+       case KVM_CAP_DIRTY_LOG_RING:
+#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+               return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
+#else
+               return 0;
+#endif
        default:
                break;
        }
        return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
+static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
+{
+       int r;
+
+       if (!KVM_DIRTY_LOG_PAGE_OFFSET)
+               return -EINVAL;
+
+       /* the size should be power of 2 */
+       if (!size || (size & (size - 1)))
+               return -EINVAL;
+
+       /* Should be bigger to keep the reserved entries, or a page */
+       if (size < kvm_dirty_ring_get_rsvd_entries() *
+           sizeof(struct kvm_dirty_gfn) || size < PAGE_SIZE)
+               return -EINVAL;
+
+       if (size > KVM_DIRTY_RING_MAX_ENTRIES *
+           sizeof(struct kvm_dirty_gfn))
+               return -E2BIG;
+
+       /* We only allow it to set once */
+       if (kvm->dirty_ring_size)
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+
+       if (kvm->created_vcpus) {
+               /* We don't allow to change this value after vcpu created */
+               r = -EINVAL;
+       } else {
+               kvm->dirty_ring_size = size;
+               r = 0;
+       }
+
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
+{
+       int i;
+       struct kvm_vcpu *vcpu;
+       int cleared = 0;
+
+       if (!kvm->dirty_ring_size)
+               return -EINVAL;
+
+       mutex_lock(&kvm->slots_lock);
+
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring);
+
+       mutex_unlock(&kvm->slots_lock);
+
+       if (cleared)
+               kvm_flush_remote_tlbs(kvm);
+
+       return cleared;
+}
+
 int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                                                  struct kvm_enable_cap *cap)
 {
@@ -3670,6 +3785,8 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
                kvm->max_halt_poll_ns = cap->args[0];
                return 0;
        }
+       case KVM_CAP_DIRTY_LOG_RING:
+               return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
        default:
                return kvm_vm_ioctl_enable_cap(kvm, cap);
        }
@@ -3854,6 +3971,9 @@ static long kvm_vm_ioctl(struct file *filp,
        case KVM_CHECK_EXTENSION:
                r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
                break;
+       case KVM_RESET_DIRTY_RINGS:
+               r = kvm_vm_ioctl_reset_dirty_pages(kvm);
+               break;
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
        }